1 /*
2 * Copyright 2008 Ben Skeggs
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "nvc0/nvc0_context.h"
24 #include "nvc0/nvc0_resource.h"
25 #include "nvc0/gm107_texture.xml.h"
26 #include "nvc0/nvc0_compute.xml.h"
27 #include "nv50/g80_texture.xml.h"
28 #include "nv50/g80_defs.xml.h"
29
30 #include "util/format/u_format.h"
31
32 #define NVE4_TIC_ENTRY_INVALID 0x000fffff
33 #define NVE4_TSC_ENTRY_INVALID 0xfff00000
34
35 static inline uint32_t
nv50_tic_swizzle(const struct nvc0_format * fmt,unsigned swz,bool tex_int)36 nv50_tic_swizzle(const struct nvc0_format *fmt, unsigned swz, bool tex_int)
37 {
38 switch (swz) {
39 case PIPE_SWIZZLE_X : return fmt->tic.src_x;
40 case PIPE_SWIZZLE_Y: return fmt->tic.src_y;
41 case PIPE_SWIZZLE_Z : return fmt->tic.src_z;
42 case PIPE_SWIZZLE_W: return fmt->tic.src_w;
43 case PIPE_SWIZZLE_1:
44 return tex_int ? G80_TIC_SOURCE_ONE_INT : G80_TIC_SOURCE_ONE_FLOAT;
45 case PIPE_SWIZZLE_0:
46 default:
47 return G80_TIC_SOURCE_ZERO;
48 }
49 }
50
51 struct pipe_sampler_view *
nvc0_create_sampler_view(struct pipe_context * pipe,struct pipe_resource * res,const struct pipe_sampler_view * templ)52 nvc0_create_sampler_view(struct pipe_context *pipe,
53 struct pipe_resource *res,
54 const struct pipe_sampler_view *templ)
55 {
56 uint32_t flags = 0;
57
58 if (templ->target == PIPE_TEXTURE_RECT || templ->target == PIPE_BUFFER)
59 flags |= NV50_TEXVIEW_SCALED_COORDS;
60
61 return nvc0_create_texture_view(pipe, res, templ, flags);
62 }
63
64 static struct pipe_sampler_view *
gm107_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags)65 gm107_create_texture_view(struct pipe_context *pipe,
66 struct pipe_resource *texture,
67 const struct pipe_sampler_view *templ,
68 uint32_t flags)
69 {
70 const struct util_format_description *desc;
71 const struct nvc0_format *fmt;
72 uint64_t address;
73 uint32_t *tic;
74 uint32_t swz[4];
75 uint32_t width, height;
76 uint32_t depth;
77 struct nv50_tic_entry *view;
78 struct nv50_miptree *mt;
79 bool tex_int;
80
81 view = MALLOC_STRUCT(nv50_tic_entry);
82 if (!view)
83 return NULL;
84 mt = nv50_miptree(texture);
85
86 view->pipe = *templ;
87 view->pipe.reference.count = 1;
88 view->pipe.texture = NULL;
89 view->pipe.context = pipe;
90
91 view->id = -1;
92 view->bindless = 0;
93
94 pipe_resource_reference(&view->pipe.texture, texture);
95
96 tic = &view->tic[0];
97
98 desc = util_format_description(view->pipe.format);
99 tex_int = util_format_is_pure_integer(view->pipe.format);
100
101 fmt = &nvc0_format_table[view->pipe.format];
102 swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
103 swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
104 swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
105 swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
106
107 tic[0] = fmt->tic.format << GM107_TIC2_0_COMPONENTS_SIZES__SHIFT;
108 tic[0] |= fmt->tic.type_r << GM107_TIC2_0_R_DATA_TYPE__SHIFT;
109 tic[0] |= fmt->tic.type_g << GM107_TIC2_0_G_DATA_TYPE__SHIFT;
110 tic[0] |= fmt->tic.type_b << GM107_TIC2_0_B_DATA_TYPE__SHIFT;
111 tic[0] |= fmt->tic.type_a << GM107_TIC2_0_A_DATA_TYPE__SHIFT;
112 tic[0] |= swz[0] << GM107_TIC2_0_X_SOURCE__SHIFT;
113 tic[0] |= swz[1] << GM107_TIC2_0_Y_SOURCE__SHIFT;
114 tic[0] |= swz[2] << GM107_TIC2_0_Z_SOURCE__SHIFT;
115 tic[0] |= swz[3] << GM107_TIC2_0_W_SOURCE__SHIFT;
116
117 address = mt->base.address;
118
119 tic[3] = GM107_TIC2_3_LOD_ANISO_QUALITY_2;
120 tic[4] = GM107_TIC2_4_SECTOR_PROMOTION_PROMOTE_TO_2_V;
121 tic[4] |= GM107_TIC2_4_BORDER_SIZE_SAMPLER_COLOR;
122
123 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
124 tic[4] |= GM107_TIC2_4_SRGB_CONVERSION;
125
126 if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
127 tic[5] = GM107_TIC2_5_NORMALIZED_COORDS;
128 else
129 tic[5] = 0;
130
131 /* check for linear storage type */
132 if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
133 if (texture->target == PIPE_BUFFER) {
134 assert(!(tic[5] & GM107_TIC2_5_NORMALIZED_COORDS));
135 width = view->pipe.u.buf.size / (desc->block.bits / 8) - 1;
136 address +=
137 view->pipe.u.buf.offset;
138 tic[2] = GM107_TIC2_2_HEADER_VERSION_ONE_D_BUFFER;
139 tic[3] |= width >> 16;
140 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_BUFFER;
141 tic[4] |= width & 0xffff;
142 } else {
143 assert(!(mt->level[0].pitch & 0x1f));
144 /* must be 2D texture without mip maps */
145 tic[2] = GM107_TIC2_2_HEADER_VERSION_PITCH;
146 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
147 tic[3] |= mt->level[0].pitch >> 5;
148 tic[4] |= mt->base.base.width0 - 1;
149 tic[5] |= 0 << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
150 tic[5] |= mt->base.base.height0 - 1;
151 }
152 tic[1] = address;
153 tic[2] |= address >> 32;
154 tic[6] = 0;
155 tic[7] = 0;
156 return &view->pipe;
157 }
158
159 tic[2] = GM107_TIC2_2_HEADER_VERSION_BLOCKLINEAR;
160 tic[3] |=
161 ((mt->level[0].tile_mode & 0x0f0) >> 4 << 3) |
162 ((mt->level[0].tile_mode & 0xf00) >> 8 << 6);
163
164 depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
165
166 if (mt->base.base.array_size > 1) {
167 /* there doesn't seem to be a base layer field in TIC */
168 address += view->pipe.u.tex.first_layer * mt->layer_stride;
169 depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
170 }
171 tic[1] = address;
172 tic[2] |= address >> 32;
173
174 switch (templ->target) {
175 case PIPE_TEXTURE_1D:
176 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D;
177 break;
178 case PIPE_TEXTURE_2D:
179 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
180 break;
181 case PIPE_TEXTURE_RECT:
182 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D;
183 break;
184 case PIPE_TEXTURE_3D:
185 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_THREE_D;
186 break;
187 case PIPE_TEXTURE_CUBE:
188 depth /= 6;
189 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBEMAP;
190 break;
191 case PIPE_TEXTURE_1D_ARRAY:
192 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_ONE_D_ARRAY;
193 break;
194 case PIPE_TEXTURE_2D_ARRAY:
195 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_TWO_D_ARRAY;
196 break;
197 case PIPE_TEXTURE_CUBE_ARRAY:
198 depth /= 6;
199 tic[4] |= GM107_TIC2_4_TEXTURE_TYPE_CUBE_ARRAY;
200 break;
201 default:
202 unreachable("unexpected/invalid texture target");
203 }
204
205 tic[3] |= (flags & NV50_TEXVIEW_FILTER_MSAA8) ?
206 GM107_TIC2_3_USE_HEADER_OPT_CONTROL :
207 GM107_TIC2_3_LOD_ANISO_QUALITY_HIGH |
208 GM107_TIC2_3_LOD_ISO_QUALITY_HIGH;
209
210 if (flags & (NV50_TEXVIEW_ACCESS_RESOLVE | NV50_TEXVIEW_IMAGE_GM107)) {
211 width = mt->base.base.width0 << mt->ms_x;
212 height = mt->base.base.height0 << mt->ms_y;
213 } else {
214 width = mt->base.base.width0;
215 height = mt->base.base.height0;
216 }
217
218 tic[4] |= width - 1;
219
220 tic[5] |= (height - 1) & 0xffff;
221 tic[5] |= (depth - 1) << GM107_TIC2_5_DEPTH_MINUS_ONE__SHIFT;
222 tic[3] |= mt->base.base.last_level << GM107_TIC2_3_MAX_MIP_LEVEL__SHIFT;
223
224 /* sampling points: (?) */
225 if ((flags & NV50_TEXVIEW_ACCESS_RESOLVE) && mt->ms_x > 1) {
226 tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_MODIFIER_CONST_TWO;
227 tic[6] |= GM107_TIC2_6_MAX_ANISOTROPY_2_TO_1;
228 } else {
229 tic[6] = GM107_TIC2_6_ANISO_FINE_SPREAD_FUNC_TWO;
230 tic[6] |= GM107_TIC2_6_ANISO_COARSE_SPREAD_FUNC_ONE;
231 }
232
233 tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
234 tic[7] |= mt->ms_mode << GM107_TIC2_7_MULTI_SAMPLE_COUNT__SHIFT;
235
236 return &view->pipe;
237 }
238
239 struct pipe_sampler_view *
gm107_create_texture_view_from_image(struct pipe_context * pipe,const struct pipe_image_view * view)240 gm107_create_texture_view_from_image(struct pipe_context *pipe,
241 const struct pipe_image_view *view)
242 {
243 struct nv04_resource *res = nv04_resource(view->resource);
244 struct pipe_sampler_view templ = {};
245 enum pipe_texture_target target;
246 uint32_t flags = 0;
247
248 if (!res)
249 return NULL;
250 target = res->base.target;
251
252 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY)
253 target = PIPE_TEXTURE_2D_ARRAY;
254
255 templ.target = target;
256 templ.format = view->format;
257 templ.swizzle_r = PIPE_SWIZZLE_X;
258 templ.swizzle_g = PIPE_SWIZZLE_Y;
259 templ.swizzle_b = PIPE_SWIZZLE_Z;
260 templ.swizzle_a = PIPE_SWIZZLE_W;
261
262 if (target == PIPE_BUFFER) {
263 templ.u.buf.offset = view->u.buf.offset;
264 templ.u.buf.size = view->u.buf.size;
265 } else {
266 templ.u.tex.first_layer = view->u.tex.first_layer;
267 templ.u.tex.last_layer = view->u.tex.last_layer;
268 templ.u.tex.first_level = templ.u.tex.last_level = view->u.tex.level;
269 }
270
271 flags = NV50_TEXVIEW_SCALED_COORDS | NV50_TEXVIEW_IMAGE_GM107;
272
273 return nvc0_create_texture_view(pipe, &res->base, &templ, flags);
274 }
275
276 static struct pipe_sampler_view *
gf100_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags)277 gf100_create_texture_view(struct pipe_context *pipe,
278 struct pipe_resource *texture,
279 const struct pipe_sampler_view *templ,
280 uint32_t flags)
281 {
282 const struct util_format_description *desc;
283 const struct nvc0_format *fmt;
284 uint64_t address;
285 uint32_t *tic;
286 uint32_t swz[4];
287 uint32_t width, height;
288 uint32_t depth;
289 uint32_t tex_fmt;
290 struct nv50_tic_entry *view;
291 struct nv50_miptree *mt;
292 bool tex_int;
293
294 view = MALLOC_STRUCT(nv50_tic_entry);
295 if (!view)
296 return NULL;
297 mt = nv50_miptree(texture);
298
299 view->pipe = *templ;
300 view->pipe.reference.count = 1;
301 view->pipe.texture = NULL;
302 view->pipe.context = pipe;
303
304 view->id = -1;
305 view->bindless = 0;
306
307 pipe_resource_reference(&view->pipe.texture, texture);
308
309 tic = &view->tic[0];
310
311 desc = util_format_description(view->pipe.format);
312
313 fmt = &nvc0_format_table[view->pipe.format];
314
315 tex_int = util_format_is_pure_integer(view->pipe.format);
316 tex_fmt = fmt->tic.format & 0x3f;
317
318 swz[0] = nv50_tic_swizzle(fmt, view->pipe.swizzle_r, tex_int);
319 swz[1] = nv50_tic_swizzle(fmt, view->pipe.swizzle_g, tex_int);
320 swz[2] = nv50_tic_swizzle(fmt, view->pipe.swizzle_b, tex_int);
321 swz[3] = nv50_tic_swizzle(fmt, view->pipe.swizzle_a, tex_int);
322 tic[0] = (tex_fmt << G80_TIC_0_COMPONENTS_SIZES__SHIFT) |
323 (fmt->tic.type_r << G80_TIC_0_R_DATA_TYPE__SHIFT) |
324 (fmt->tic.type_g << G80_TIC_0_G_DATA_TYPE__SHIFT) |
325 (fmt->tic.type_b << G80_TIC_0_B_DATA_TYPE__SHIFT) |
326 (fmt->tic.type_a << G80_TIC_0_A_DATA_TYPE__SHIFT) |
327 (swz[0] << G80_TIC_0_X_SOURCE__SHIFT) |
328 (swz[1] << G80_TIC_0_Y_SOURCE__SHIFT) |
329 (swz[2] << G80_TIC_0_Z_SOURCE__SHIFT) |
330 (swz[3] << G80_TIC_0_W_SOURCE__SHIFT) |
331 ((fmt->tic.format & 0x40) << (GK20A_TIC_0_USE_COMPONENT_SIZES_EXTENDED__SHIFT - 6));
332
333 address = mt->base.address;
334
335 tic[2] = 0x10001000 | G80_TIC_2_BORDER_SOURCE_COLOR;
336
337 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
338 tic[2] |= G80_TIC_2_SRGB_CONVERSION;
339
340 if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
341 tic[2] |= G80_TIC_2_NORMALIZED_COORDS;
342
343 /* check for linear storage type */
344 if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
345 if (texture->target == PIPE_BUFFER) {
346 assert(!(tic[2] & G80_TIC_2_NORMALIZED_COORDS));
347 address +=
348 view->pipe.u.buf.offset;
349 tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_ONE_D_BUFFER;
350 tic[3] = 0;
351 tic[4] = /* width */
352 view->pipe.u.buf.size / (desc->block.bits / 8);
353 tic[5] = 0;
354 } else {
355 /* must be 2D texture without mip maps */
356 tic[2] |= G80_TIC_2_LAYOUT_PITCH | G80_TIC_2_TEXTURE_TYPE_TWO_D_NO_MIPMAP;
357 tic[3] = mt->level[0].pitch;
358 tic[4] = mt->base.base.width0;
359 tic[5] = (1 << 16) | mt->base.base.height0;
360 }
361 tic[6] =
362 tic[7] = 0;
363 tic[1] = address;
364 tic[2] |= address >> 32;
365 return &view->pipe;
366 }
367
368 tic[2] |=
369 ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
370 ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
371
372 depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
373
374 if (mt->base.base.array_size > 1) {
375 /* there doesn't seem to be a base layer field in TIC */
376 address += view->pipe.u.tex.first_layer * mt->layer_stride;
377 depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
378 }
379 tic[1] = address;
380 tic[2] |= address >> 32;
381
382 switch (templ->target) {
383 case PIPE_TEXTURE_1D:
384 tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D;
385 break;
386 case PIPE_TEXTURE_2D:
387 tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
388 break;
389 case PIPE_TEXTURE_RECT:
390 tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D;
391 break;
392 case PIPE_TEXTURE_3D:
393 tic[2] |= G80_TIC_2_TEXTURE_TYPE_THREE_D;
394 break;
395 case PIPE_TEXTURE_CUBE:
396 depth /= 6;
397 tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBEMAP;
398 break;
399 case PIPE_TEXTURE_1D_ARRAY:
400 tic[2] |= G80_TIC_2_TEXTURE_TYPE_ONE_D_ARRAY;
401 break;
402 case PIPE_TEXTURE_2D_ARRAY:
403 tic[2] |= G80_TIC_2_TEXTURE_TYPE_TWO_D_ARRAY;
404 break;
405 case PIPE_TEXTURE_CUBE_ARRAY:
406 depth /= 6;
407 tic[2] |= G80_TIC_2_TEXTURE_TYPE_CUBE_ARRAY;
408 break;
409 default:
410 unreachable("unexpected/invalid texture target");
411 }
412
413 tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
414
415 if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
416 width = mt->base.base.width0 << mt->ms_x;
417 height = mt->base.base.height0 << mt->ms_y;
418 } else {
419 width = mt->base.base.width0;
420 height = mt->base.base.height0;
421 }
422
423 tic[4] = (1 << 31) | width;
424
425 tic[5] = height & 0xffff;
426 tic[5] |= depth << 16;
427 tic[5] |= mt->base.base.last_level << 28;
428
429 /* sampling points: (?) */
430 if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
431 tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
432 else
433 tic[6] = 0x03000000;
434
435 tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
436 tic[7] |= mt->ms_mode << 12;
437
438 return &view->pipe;
439 }
440
441 struct pipe_sampler_view *
nvc0_create_texture_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ,uint32_t flags)442 nvc0_create_texture_view(struct pipe_context *pipe,
443 struct pipe_resource *texture,
444 const struct pipe_sampler_view *templ,
445 uint32_t flags)
446 {
447 if (nvc0_context(pipe)->screen->tic.maxwell)
448 return gm107_create_texture_view(pipe, texture, templ, flags);
449 return gf100_create_texture_view(pipe, texture, templ, flags);
450 }
451
452 bool
nvc0_update_tic(struct nvc0_context * nvc0,struct nv50_tic_entry * tic,struct nv04_resource * res)453 nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
454 struct nv04_resource *res)
455 {
456 uint64_t address = res->address;
457 if (res->base.target != PIPE_BUFFER)
458 return false;
459 address += tic->pipe.u.buf.offset;
460 if (tic->tic[1] == (uint32_t)address &&
461 (tic->tic[2] & 0xff) == address >> 32)
462 return false;
463
464 tic->tic[1] = address;
465 tic->tic[2] &= 0xffffff00;
466 tic->tic[2] |= address >> 32;
467
468 if (tic->id >= 0) {
469 nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
470 NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
471 tic->tic);
472 return true;
473 }
474
475 return false;
476 }
477
478 bool
nvc0_validate_tic(struct nvc0_context * nvc0,int s)479 nvc0_validate_tic(struct nvc0_context *nvc0, int s)
480 {
481 uint32_t commands[32];
482 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
483 unsigned i;
484 unsigned n = 0;
485 bool need_flush = false;
486
487 for (i = 0; i < nvc0->num_textures[s]; ++i) {
488 struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
489 struct nv04_resource *res;
490 const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
491
492 if (!tic) {
493 if (dirty)
494 commands[n++] = (i << 1) | 0;
495 continue;
496 }
497 res = nv04_resource(tic->pipe.texture);
498 need_flush |= nvc0_update_tic(nvc0, tic, res);
499
500 if (tic->id < 0) {
501 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
502
503 nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
504 NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
505 tic->tic);
506 need_flush = true;
507 } else
508 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
509 if (unlikely(s == 5))
510 BEGIN_NVC0(push, NVC0_CP(TEX_CACHE_CTL), 1);
511 else
512 BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
513 PUSH_DATA (push, (tic->id << 4) | 1);
514 NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
515 }
516 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
517
518 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
519 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
520
521 if (!dirty)
522 continue;
523 commands[n++] = (tic->id << 9) | (i << 1) | 1;
524
525 if (unlikely(s == 5))
526 BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
527 else
528 BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
529 }
530 for (; i < nvc0->state.num_textures[s]; ++i)
531 commands[n++] = (i << 1) | 0;
532
533 nvc0->state.num_textures[s] = nvc0->num_textures[s];
534
535 if (n) {
536 if (unlikely(s == 5))
537 BEGIN_NIC0(push, NVC0_CP(BIND_TIC), n);
538 else
539 BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
540 PUSH_DATAp(push, commands, n);
541 }
542 nvc0->textures_dirty[s] = 0;
543
544 return need_flush;
545 }
546
547 static bool
nve4_validate_tic(struct nvc0_context * nvc0,unsigned s)548 nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
549 {
550 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
551 unsigned i;
552 bool need_flush = false;
553
554 for (i = 0; i < nvc0->num_textures[s]; ++i) {
555 struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
556 struct nv04_resource *res;
557 const bool dirty = !!(nvc0->textures_dirty[s] & (1 << i));
558
559 if (!tic) {
560 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
561 continue;
562 }
563 res = nv04_resource(tic->pipe.texture);
564 need_flush |= nvc0_update_tic(nvc0, tic, res);
565
566 if (tic->id < 0) {
567 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
568
569 nvc0->base.push_data(&nvc0->base, nvc0->screen->txc, tic->id * 32,
570 NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
571 tic->tic);
572 need_flush = true;
573 } else
574 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
575 BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
576 PUSH_DATA (push, (tic->id << 4) | 1);
577 }
578 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
579
580 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
581 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
582
583 nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
584 nvc0->tex_handles[s][i] |= tic->id;
585 if (dirty)
586 BCTX_REFN(nvc0->bufctx_3d, 3D_TEX(s, i), res, RD);
587 }
588 for (; i < nvc0->state.num_textures[s]; ++i) {
589 nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
590 nvc0->textures_dirty[s] |= 1 << i;
591 }
592
593 nvc0->state.num_textures[s] = nvc0->num_textures[s];
594
595 return need_flush;
596 }
597
nvc0_validate_textures(struct nvc0_context * nvc0)598 void nvc0_validate_textures(struct nvc0_context *nvc0)
599 {
600 bool need_flush = false;
601 int i;
602
603 for (i = 0; i < 5; i++) {
604 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
605 need_flush |= nve4_validate_tic(nvc0, i);
606 else
607 need_flush |= nvc0_validate_tic(nvc0, i);
608 }
609
610 if (need_flush) {
611 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
612 PUSH_DATA (nvc0->base.pushbuf, 0);
613 }
614
615 /* Invalidate all CP textures because they are aliased. */
616 for (int i = 0; i < nvc0->num_textures[5]; i++)
617 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_TEX(i));
618 nvc0->textures_dirty[5] = ~0;
619 nvc0->dirty_cp |= NVC0_NEW_CP_TEXTURES;
620 }
621
622 bool
nvc0_validate_tsc(struct nvc0_context * nvc0,int s)623 nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
624 {
625 uint32_t commands[16];
626 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
627 unsigned i;
628 unsigned n = 0;
629 bool need_flush = false;
630
631 for (i = 0; i < nvc0->num_samplers[s]; ++i) {
632 struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
633
634 if (!(nvc0->samplers_dirty[s] & (1 << i)))
635 continue;
636 if (!tsc) {
637 commands[n++] = (i << 4) | 0;
638 continue;
639 }
640 nvc0->seamless_cube_map = tsc->seamless_cube_map;
641 if (tsc->id < 0) {
642 tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
643
644 nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
645 65536 + tsc->id * 32, NV_VRAM_DOMAIN(&nvc0->screen->base),
646 32, tsc->tsc);
647 need_flush = true;
648 }
649 nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
650
651 commands[n++] = (tsc->id << 12) | (i << 4) | 1;
652 }
653 for (; i < nvc0->state.num_samplers[s]; ++i)
654 commands[n++] = (i << 4) | 0;
655
656 nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
657
658 // TXF, in unlinked tsc mode, will always use sampler 0. So we have to
659 // ensure that it remains bound. Its contents don't matter, all samplers we
660 // ever create have the SRGB_CONVERSION bit set, so as long as the first
661 // entry is initialized, we're good to go. This is the only bit that has
662 // any effect on what TXF does.
663 if ((nvc0->samplers_dirty[s] & 1) && !nvc0->samplers[s][0]) {
664 if (n == 0)
665 n = 1;
666 // We're guaranteed that the first command refers to the first slot, so
667 // we're not overwriting a valid entry.
668 commands[0] = (0 << 12) | (0 << 4) | 1;
669 }
670
671 if (n) {
672 if (unlikely(s == 5))
673 BEGIN_NIC0(push, NVC0_CP(BIND_TSC), n);
674 else
675 BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
676 PUSH_DATAp(push, commands, n);
677 }
678 nvc0->samplers_dirty[s] = 0;
679
680 return need_flush;
681 }
682
683 bool
nve4_validate_tsc(struct nvc0_context * nvc0,int s)684 nve4_validate_tsc(struct nvc0_context *nvc0, int s)
685 {
686 unsigned i;
687 bool need_flush = false;
688
689 for (i = 0; i < nvc0->num_samplers[s]; ++i) {
690 struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
691
692 if (!tsc) {
693 nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
694 continue;
695 }
696 if (tsc->id < 0) {
697 tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
698
699 nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
700 65536 + tsc->id * 32,
701 NV_VRAM_DOMAIN(&nvc0->screen->base),
702 32, tsc->tsc);
703 need_flush = true;
704 }
705 nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
706
707 nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
708 nvc0->tex_handles[s][i] |= tsc->id << 20;
709 }
710 for (; i < nvc0->state.num_samplers[s]; ++i) {
711 nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
712 nvc0->samplers_dirty[s] |= 1 << i;
713 }
714
715 nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
716
717 return need_flush;
718 }
719
nvc0_validate_samplers(struct nvc0_context * nvc0)720 void nvc0_validate_samplers(struct nvc0_context *nvc0)
721 {
722 bool need_flush = false;
723 int i;
724
725 for (i = 0; i < 5; i++) {
726 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
727 need_flush |= nve4_validate_tsc(nvc0, i);
728 else
729 need_flush |= nvc0_validate_tsc(nvc0, i);
730 }
731
732 if (need_flush) {
733 BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
734 PUSH_DATA (nvc0->base.pushbuf, 0);
735 }
736
737 /* Invalidate all CP samplers because they are aliased. */
738 nvc0->samplers_dirty[5] = ~0;
739 nvc0->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
740 }
741
742 void
nvc0_upload_tsc0(struct nvc0_context * nvc0)743 nvc0_upload_tsc0(struct nvc0_context *nvc0)
744 {
745 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
746 u32 data[8] = { G80_TSC_0_SRGB_CONVERSION };
747 nvc0->base.push_data(&nvc0->base, nvc0->screen->txc,
748 65536 /*+ tsc->id * 32*/,
749 NV_VRAM_DOMAIN(&nvc0->screen->base), 32, data);
750 BEGIN_NVC0(push, NVC0_3D(TSC_FLUSH), 1);
751 PUSH_DATA (push, 0);
752 }
753
754 /* Upload the "diagonal" entries for the possible texture sources ($t == $s).
755 * At some point we might want to get a list of the combinations used by a
756 * shader and fill in those entries instead of having it extract the handles.
757 */
758 void
nve4_set_tex_handles(struct nvc0_context * nvc0)759 nve4_set_tex_handles(struct nvc0_context *nvc0)
760 {
761 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
762 struct nvc0_screen *screen = nvc0->screen;
763 unsigned s;
764
765 if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
766 return;
767
768 for (s = 0; s < 5; ++s) {
769 uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
770 if (!dirty)
771 continue;
772 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
773 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
774 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
775 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
776 do {
777 int i = ffs(dirty) - 1;
778 dirty &= ~(1 << i);
779
780 BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
781 PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(i));
782 PUSH_DATA (push, nvc0->tex_handles[s][i]);
783 } while (dirty);
784
785 nvc0->textures_dirty[s] = 0;
786 nvc0->samplers_dirty[s] = 0;
787 }
788 }
789
790 static uint64_t
nve4_create_texture_handle(struct pipe_context * pipe,struct pipe_sampler_view * view,const struct pipe_sampler_state * sampler)791 nve4_create_texture_handle(struct pipe_context *pipe,
792 struct pipe_sampler_view *view,
793 const struct pipe_sampler_state *sampler)
794 {
795 /* We have to create persistent handles that won't change for these objects
796 * That means that we have to upload them into place and lock them so that
797 * they can't be kicked out later.
798 */
799 struct nvc0_context *nvc0 = nvc0_context(pipe);
800 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
801 struct nv50_tic_entry *tic = nv50_tic_entry(view);
802 struct nv50_tsc_entry *tsc = pipe->create_sampler_state(pipe, sampler);
803 struct pipe_sampler_view *v = NULL;
804
805 tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
806 if (tsc->id < 0)
807 goto fail;
808
809 if (tic->id < 0) {
810 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
811 if (tic->id < 0)
812 goto fail;
813
814 nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
815 NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
816 tic->tic);
817
818 IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
819 }
820
821 nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc,
822 65536 + tsc->id * 32,
823 NV_VRAM_DOMAIN(&nvc0->screen->base),
824 32, tsc->tsc);
825
826 IMMED_NVC0(push, NVC0_3D(TSC_FLUSH), 0);
827
828 // Add an extra reference to this sampler view effectively held by this
829 // texture handle. This is to deal with the sampler view being dereferenced
830 // before the handle is. However we need the view to still be live until the
831 // handle to it is deleted.
832 pipe_sampler_view_reference(&v, view);
833 p_atomic_inc(&tic->bindless);
834
835 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
836 nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
837
838 return 0x100000000ULL | (tsc->id << 20) | tic->id;
839
840 fail:
841 pipe->delete_sampler_state(pipe, tsc);
842 return 0;
843 }
844
845 static bool
view_bound(struct nvc0_context * nvc0,struct pipe_sampler_view * view)846 view_bound(struct nvc0_context *nvc0, struct pipe_sampler_view *view) {
847 for (int s = 0; s < 6; s++) {
848 for (int i = 0; i < nvc0->num_textures[s]; i++)
849 if (nvc0->textures[s][i] == view)
850 return true;
851 }
852 return false;
853 }
854
855 static void
nve4_delete_texture_handle(struct pipe_context * pipe,uint64_t handle)856 nve4_delete_texture_handle(struct pipe_context *pipe, uint64_t handle)
857 {
858 struct nvc0_context *nvc0 = nvc0_context(pipe);
859 uint32_t tic = handle & NVE4_TIC_ENTRY_INVALID;
860 uint32_t tsc = (handle & NVE4_TSC_ENTRY_INVALID) >> 20;
861 struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];
862
863 if (entry) {
864 struct pipe_sampler_view *view = &entry->pipe;
865 assert(entry->bindless);
866 p_atomic_dec(&entry->bindless);
867 if (!view_bound(nvc0, view))
868 nvc0_screen_tic_unlock(nvc0->screen, entry);
869 pipe_sampler_view_reference(&view, NULL);
870 }
871
872 pipe->delete_sampler_state(pipe, nvc0->screen->tsc.entries[tsc]);
873 }
874
875 static void
nve4_make_texture_handle_resident(struct pipe_context * pipe,uint64_t handle,bool resident)876 nve4_make_texture_handle_resident(struct pipe_context *pipe,
877 uint64_t handle, bool resident)
878 {
879 struct nvc0_context *nvc0 = nvc0_context(pipe);
880 if (resident) {
881 struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
882 struct nv50_tic_entry *tic =
883 nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];
884 assert(tic);
885 assert(tic->bindless);
886
887 res->handle = handle;
888 res->buf = nv04_resource(tic->pipe.texture);
889 res->flags = NOUVEAU_BO_RD;
890 list_add(&res->list, &nvc0->tex_head);
891 } else {
892 list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->tex_head, list) {
893 if (pos->handle == handle) {
894 list_del(&pos->list);
895 free(pos);
896 break;
897 }
898 }
899 }
900 }
901
902 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
903 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
904 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
905
906 static void
nvc0_get_surface_dims(const struct pipe_image_view * view,int * width,int * height,int * depth)907 nvc0_get_surface_dims(const struct pipe_image_view *view,
908 int *width, int *height, int *depth)
909 {
910 struct nv04_resource *res = nv04_resource(view->resource);
911 int level;
912
913 *width = *height = *depth = 1;
914 if (res->base.target == PIPE_BUFFER) {
915 *width = view->u.buf.size / util_format_get_blocksize(view->format);
916 return;
917 }
918
919 level = view->u.tex.level;
920 *width = u_minify(view->resource->width0, level);
921 *height = u_minify(view->resource->height0, level);
922 *depth = u_minify(view->resource->depth0, level);
923
924 switch (res->base.target) {
925 case PIPE_TEXTURE_1D_ARRAY:
926 case PIPE_TEXTURE_2D_ARRAY:
927 case PIPE_TEXTURE_CUBE:
928 case PIPE_TEXTURE_CUBE_ARRAY:
929 *depth = view->u.tex.last_layer - view->u.tex.first_layer + 1;
930 break;
931 case PIPE_TEXTURE_1D:
932 case PIPE_TEXTURE_2D:
933 case PIPE_TEXTURE_RECT:
934 case PIPE_TEXTURE_3D:
935 break;
936 default:
937 assert(!"unexpected texture target");
938 break;
939 }
940 }
941
942 void
nvc0_mark_image_range_valid(const struct pipe_image_view * view)943 nvc0_mark_image_range_valid(const struct pipe_image_view *view)
944 {
945 struct nv04_resource *res = (struct nv04_resource *)view->resource;
946
947 assert(view->resource->target == PIPE_BUFFER);
948
949 util_range_add(&res->base, &res->valid_buffer_range,
950 view->u.buf.offset,
951 view->u.buf.offset + view->u.buf.size);
952 }
953
954 void
nve4_set_surface_info(struct nouveau_pushbuf * push,const struct pipe_image_view * view,struct nvc0_context * nvc0)955 nve4_set_surface_info(struct nouveau_pushbuf *push,
956 const struct pipe_image_view *view,
957 struct nvc0_context *nvc0)
958 {
959 struct nvc0_screen *screen = nvc0->screen;
960 struct nv04_resource *res;
961 uint64_t address;
962 uint32_t *const info = push->cur;
963 int width, height, depth;
964 uint8_t log2cpp;
965
966 if (view && !nve4_su_format_map[view->format])
967 NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
968
969 push->cur += 16;
970
971 if (!view || !nve4_su_format_map[view->format]) {
972 memset(info, 0, 16 * sizeof(*info));
973
974 info[0] = 0xbadf0000;
975 info[1] = 0x80004000;
976 info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
977 screen->lib_code->start;
978 return;
979 }
980 res = nv04_resource(view->resource);
981
982 address = res->address;
983
984 /* get surface dimensions based on the target. */
985 nvc0_get_surface_dims(view, &width, &height, &depth);
986
987 info[8] = width;
988 info[9] = height;
989 info[10] = depth;
990 switch (res->base.target) {
991 case PIPE_TEXTURE_1D_ARRAY:
992 info[11] = 1;
993 break;
994 case PIPE_TEXTURE_2D:
995 case PIPE_TEXTURE_RECT:
996 info[11] = 2;
997 break;
998 case PIPE_TEXTURE_3D:
999 info[11] = 3;
1000 break;
1001 case PIPE_TEXTURE_2D_ARRAY:
1002 case PIPE_TEXTURE_CUBE:
1003 case PIPE_TEXTURE_CUBE_ARRAY:
1004 info[11] = 4;
1005 break;
1006 default:
1007 info[11] = 0;
1008 break;
1009 }
1010 log2cpp = (0xf000 & nve4_su_format_aux_map[view->format]) >> 12;
1011
1012 /* Stick the blockwidth (ie. number of bytes per pixel) to check if the
1013 * format doesn't mismatch. */
1014 info[12] = util_format_get_blocksize(view->format);
1015
1016 /* limit in bytes for raw access */
1017 info[13] = (0x06 << 22) | ((width << log2cpp) - 1);
1018
1019 info[1] = nve4_su_format_map[view->format];
1020
1021 #if 0
1022 switch (util_format_get_blocksizebits(view->format)) {
1023 case 16: info[1] |= 1 << 16; break;
1024 case 32: info[1] |= 2 << 16; break;
1025 case 64: info[1] |= 3 << 16; break;
1026 case 128: info[1] |= 4 << 16; break;
1027 default:
1028 break;
1029 }
1030 #else
1031 info[1] |= log2cpp << 16;
1032 info[1] |= 0x4000;
1033 info[1] |= (0x0f00 & nve4_su_format_aux_map[view->format]);
1034 #endif
1035
1036 if (res->base.target == PIPE_BUFFER) {
1037 address += view->u.buf.offset;
1038
1039 info[0] = address >> 8;
1040 info[2] = width - 1;
1041 info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
1042 info[3] = 0;
1043 info[4] = 0;
1044 info[5] = 0;
1045 info[6] = 0;
1046 info[7] = 0;
1047 info[14] = 0;
1048 info[15] = 0;
1049 } else {
1050 struct nv50_miptree *mt = nv50_miptree(&res->base);
1051 struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1052 unsigned z = view->u.tex.first_layer;
1053
1054 if (!mt->layout_3d) {
1055 address += mt->layer_stride * z;
1056 z = 0;
1057 }
1058
1059 address += lvl->offset;
1060
1061 info[0] = address >> 8;
1062 info[2] = (width << mt->ms_x) - 1;
1063 /* NOTE: this is really important: */
1064 info[2] |= (0xff & nve4_su_format_aux_map[view->format]) << 22;
1065 info[3] = (0x88 << 24) | (lvl->pitch / 64);
1066 info[4] = (height << mt->ms_y) - 1;
1067 info[4] |= (lvl->tile_mode & 0x0f0) << 25;
1068 info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
1069 info[5] = mt->layer_stride >> 8;
1070 info[6] = depth - 1;
1071 info[6] |= (lvl->tile_mode & 0xf00) << 21;
1072 info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
1073 info[7] = mt->layout_3d ? 1 : 0;
1074 info[7] |= z << 16;
1075 info[14] = mt->ms_x;
1076 info[15] = mt->ms_y;
1077 }
1078 }
1079
1080 static inline void
nvc0_set_surface_info(struct nouveau_pushbuf * push,const struct pipe_image_view * view,uint64_t address,int width,int height,int depth)1081 nvc0_set_surface_info(struct nouveau_pushbuf *push,
1082 const struct pipe_image_view *view, uint64_t address,
1083 int width, int height, int depth)
1084 {
1085 struct nv04_resource *res;
1086 uint32_t *const info = push->cur;
1087
1088 push->cur += 16;
1089
1090 /* Make sure to always initialize the surface information area because it's
1091 * used to check if the given image is bound or not. */
1092 memset(info, 0, 16 * sizeof(*info));
1093
1094 if (!view || !view->resource)
1095 return;
1096 res = nv04_resource(view->resource);
1097
1098 /* Stick the image dimensions for the imageSize() builtin. */
1099 info[8] = width;
1100 info[9] = height;
1101 info[10] = depth;
1102
1103 /* Stick the blockwidth (ie. number of bytes per pixel) to calculate pixel
1104 * offset and to check if the format doesn't mismatch. */
1105 info[12] = ffs(util_format_get_blocksize(view->format)) - 1;
1106
1107 if (res->base.target == PIPE_BUFFER) {
1108 info[0] = address >> 8;
1109 info[2] = width;
1110 } else {
1111 struct nv50_miptree *mt = nv50_miptree(&res->base);
1112 struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1113 unsigned z = mt->layout_3d ? view->u.tex.first_layer : 0;
1114 unsigned nby = align(util_format_get_nblocksy(view->format, height),
1115 NVC0_TILE_SIZE_Y(lvl->tile_mode));
1116
1117 /* NOTE: this does not precisely match nve4; the values are made to be
1118 * easier for the shader to consume.
1119 */
1120 info[0] = address >> 8;
1121 info[2] = (NVC0_TILE_SHIFT_X(lvl->tile_mode) - info[12]) << 24;
1122 info[4] = NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 24 | nby;
1123 info[5] = mt->layer_stride >> 8;
1124 info[6] = NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 24;
1125 info[7] = z;
1126 info[14] = mt->ms_x;
1127 info[15] = mt->ms_y;
1128 }
1129 }
1130
1131 void
nvc0_validate_suf(struct nvc0_context * nvc0,int s)1132 nvc0_validate_suf(struct nvc0_context *nvc0, int s)
1133 {
1134 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1135 struct nvc0_screen *screen = nvc0->screen;
1136
1137 for (int i = 0; i < NVC0_MAX_IMAGES; ++i) {
1138 struct pipe_image_view *view = &nvc0->images[s][i];
1139 int width, height, depth;
1140 uint64_t address = 0;
1141
1142 if (s == 5)
1143 BEGIN_NVC0(push, NVC0_CP(IMAGE(i)), 6);
1144 else
1145 BEGIN_NVC0(push, NVC0_3D(IMAGE(i)), 6);
1146
1147 if (view->resource) {
1148 struct nv04_resource *res = nv04_resource(view->resource);
1149 unsigned rt = nvc0_format_table[view->format].rt;
1150
1151 if (util_format_is_depth_or_stencil(view->format))
1152 rt = rt << 12;
1153 else
1154 rt = (rt << 4) | (0x14 << 12);
1155
1156 /* get surface dimensions based on the target. */
1157 nvc0_get_surface_dims(view, &width, &height, &depth);
1158
1159 address = res->address;
1160 if (res->base.target == PIPE_BUFFER) {
1161 unsigned blocksize = util_format_get_blocksize(view->format);
1162
1163 address += view->u.buf.offset;
1164 assert(!(address & 0xff));
1165
1166 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1167 nvc0_mark_image_range_valid(view);
1168
1169 PUSH_DATAh(push, address);
1170 PUSH_DATA (push, address);
1171 PUSH_DATA (push, align(width * blocksize, 0x100));
1172 PUSH_DATA (push, NVC0_3D_IMAGE_HEIGHT_LINEAR | 1);
1173 PUSH_DATA (push, rt);
1174 PUSH_DATA (push, 0);
1175 } else {
1176 struct nv50_miptree *mt = nv50_miptree(view->resource);
1177 struct nv50_miptree_level *lvl = &mt->level[view->u.tex.level];
1178 unsigned adjusted_width = width, adjusted_height = height;
1179
1180 if (mt->layout_3d) {
1181 // We have to adjust the size of the 3d surface to be
1182 // accessible within 2d limits. The size of each z tile goes
1183 // into the x direction, while the number of z tiles goes into
1184 // the y direction.
1185 const unsigned nbx = util_format_get_nblocksx(view->format, width);
1186 const unsigned nby = util_format_get_nblocksy(view->format, height);
1187 const unsigned tsx = NVC0_TILE_SIZE_X(lvl->tile_mode);
1188 const unsigned tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);
1189 const unsigned tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode);
1190
1191 adjusted_width = align(nbx, tsx / util_format_get_blocksize(view->format)) * tsz;
1192 adjusted_height = align(nby, tsy) * align(depth, tsz) >> NVC0_TILE_SHIFT_Z(lvl->tile_mode);
1193 } else {
1194 const unsigned z = view->u.tex.first_layer;
1195 address += mt->layer_stride * z;
1196 }
1197 address += lvl->offset;
1198
1199 PUSH_DATAh(push, address);
1200 PUSH_DATA (push, address);
1201 PUSH_DATA (push, adjusted_width << mt->ms_x);
1202 PUSH_DATA (push, adjusted_height << mt->ms_y);
1203 PUSH_DATA (push, rt);
1204 PUSH_DATA (push, lvl->tile_mode & 0xff); /* mask out z-tiling */
1205 }
1206
1207 if (s == 5)
1208 BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
1209 else
1210 BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1211 } else {
1212 PUSH_DATA(push, 0);
1213 PUSH_DATA(push, 0);
1214 PUSH_DATA(push, 0);
1215 PUSH_DATA(push, 0);
1216 PUSH_DATA(push, 0x14000);
1217 PUSH_DATA(push, 0);
1218 }
1219
1220 /* stick surface information into the driver constant buffer */
1221 if (s == 5)
1222 BEGIN_NVC0(push, NVC0_CP(CB_SIZE), 3);
1223 else
1224 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1225 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1226 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1227 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1228 if (s == 5)
1229 BEGIN_1IC0(push, NVC0_CP(CB_POS), 1 + 16);
1230 else
1231 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1232 PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1233
1234 nvc0_set_surface_info(push, view, address, width, height, depth);
1235 }
1236 }
1237
1238 static inline void
nvc0_update_surface_bindings(struct nvc0_context * nvc0)1239 nvc0_update_surface_bindings(struct nvc0_context *nvc0)
1240 {
1241 nvc0_validate_suf(nvc0, 4);
1242
1243 /* Invalidate all COMPUTE images because they are aliased with FRAGMENT. */
1244 nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
1245 nvc0->dirty_cp |= NVC0_NEW_CP_SURFACES;
1246 nvc0->images_dirty[5] |= nvc0->images_valid[5];
1247 }
1248
1249 static void
gm107_validate_surfaces(struct nvc0_context * nvc0,struct pipe_image_view * view,int stage,int slot)1250 gm107_validate_surfaces(struct nvc0_context *nvc0,
1251 struct pipe_image_view *view, int stage, int slot)
1252 {
1253 struct nv04_resource *res = nv04_resource(view->resource);
1254 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1255 struct nvc0_screen *screen = nvc0->screen;
1256 struct nv50_tic_entry *tic;
1257
1258 tic = nv50_tic_entry(nvc0->images_tic[stage][slot]);
1259
1260 res = nv04_resource(tic->pipe.texture);
1261 nvc0_update_tic(nvc0, tic, res);
1262
1263 if (tic->id < 0) {
1264 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
1265
1266 /* upload the texture view */
1267 nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
1268 NV_VRAM_DOMAIN(&nvc0->screen->base), 32, tic->tic);
1269
1270 BEGIN_NVC0(push, NVC0_3D(TIC_FLUSH), 1);
1271 PUSH_DATA (push, 0);
1272 } else
1273 if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
1274 BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
1275 PUSH_DATA (push, (tic->id << 4) | 1);
1276 }
1277 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
1278
1279 res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
1280 res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
1281
1282 BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RD);
1283
1284 /* upload the texture handle */
1285 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1286 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1287 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1288 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(stage));
1289 BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
1290 PUSH_DATA (push, NVC0_CB_AUX_TEX_INFO(slot + 32));
1291 PUSH_DATA (push, tic->id);
1292 }
1293
1294 static inline void
nve4_update_surface_bindings(struct nvc0_context * nvc0)1295 nve4_update_surface_bindings(struct nvc0_context *nvc0)
1296 {
1297 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1298 struct nvc0_screen *screen = nvc0->screen;
1299 int i, j, s;
1300
1301 for (s = 0; s < 5; s++) {
1302 if (!nvc0->images_dirty[s])
1303 continue;
1304
1305 for (i = 0; i < NVC0_MAX_IMAGES; ++i) {
1306 struct pipe_image_view *view = &nvc0->images[s][i];
1307
1308 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1309 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1310 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1311 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1312 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1313 PUSH_DATA (push, NVC0_CB_AUX_SU_INFO(i));
1314
1315 if (view->resource) {
1316 struct nv04_resource *res = nv04_resource(view->resource);
1317
1318 if (res->base.target == PIPE_BUFFER) {
1319 if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1320 nvc0_mark_image_range_valid(view);
1321 }
1322
1323 nve4_set_surface_info(push, view, nvc0);
1324 BCTX_REFN(nvc0->bufctx_3d, 3D_SUF, res, RDWR);
1325
1326 if (nvc0->screen->base.class_3d >= GM107_3D_CLASS)
1327 gm107_validate_surfaces(nvc0, view, s, i);
1328 } else {
1329 for (j = 0; j < 16; j++)
1330 PUSH_DATA(push, 0);
1331 }
1332 }
1333 }
1334 }
1335
1336 void
nvc0_validate_surfaces(struct nvc0_context * nvc0)1337 nvc0_validate_surfaces(struct nvc0_context *nvc0)
1338 {
1339 if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
1340 nve4_update_surface_bindings(nvc0);
1341 } else {
1342 nvc0_update_surface_bindings(nvc0);
1343 }
1344 }
1345
1346 static uint64_t
nve4_create_image_handle(struct pipe_context * pipe,const struct pipe_image_view * view)1347 nve4_create_image_handle(struct pipe_context *pipe,
1348 const struct pipe_image_view *view)
1349 {
1350 struct nvc0_context *nvc0 = nvc0_context(pipe);
1351 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1352 struct nvc0_screen *screen = nvc0->screen;
1353 int i = screen->img.next, s;
1354
1355 while (screen->img.entries[i]) {
1356 i = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
1357 if (i == screen->img.next)
1358 return 0;
1359 }
1360
1361 screen->img.next = (i + 1) & (NVE4_IMG_MAX_HANDLES - 1);
1362 screen->img.entries[i] = calloc(1, sizeof(struct pipe_image_view));
1363 *screen->img.entries[i] = *view;
1364
1365 for (s = 0; s < 6; s++) {
1366 BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
1367 PUSH_DATA (push, NVC0_CB_AUX_SIZE);
1368 PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1369 PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(s));
1370 BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 16);
1371 PUSH_DATA (push, NVC0_CB_AUX_BINDLESS_INFO(i));
1372 nve4_set_surface_info(push, view, nvc0);
1373 }
1374
1375 return 0x100000000ULL | i;
1376 }
1377
1378 static void
nve4_delete_image_handle(struct pipe_context * pipe,uint64_t handle)1379 nve4_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
1380 {
1381 struct nvc0_context *nvc0 = nvc0_context(pipe);
1382 struct nvc0_screen *screen = nvc0->screen;
1383 int i = handle & (NVE4_IMG_MAX_HANDLES - 1);
1384
1385 free(screen->img.entries[i]);
1386 screen->img.entries[i] = NULL;
1387 }
1388
1389 static void
nve4_make_image_handle_resident(struct pipe_context * pipe,uint64_t handle,unsigned access,bool resident)1390 nve4_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
1391 unsigned access, bool resident)
1392 {
1393 struct nvc0_context *nvc0 = nvc0_context(pipe);
1394 struct nvc0_screen *screen = nvc0->screen;
1395
1396 if (resident) {
1397 struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
1398 struct pipe_image_view *view =
1399 screen->img.entries[handle & (NVE4_IMG_MAX_HANDLES - 1)];
1400 assert(view);
1401
1402 if (view->resource->target == PIPE_BUFFER &&
1403 access & PIPE_IMAGE_ACCESS_WRITE)
1404 nvc0_mark_image_range_valid(view);
1405 res->handle = handle;
1406 res->buf = nv04_resource(view->resource);
1407 res->flags = (access & 3) << 8;
1408 list_add(&res->list, &nvc0->img_head);
1409 } else {
1410 list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
1411 if (pos->handle == handle) {
1412 list_del(&pos->list);
1413 free(pos);
1414 break;
1415 }
1416 }
1417 }
1418 }
1419
1420 static uint64_t
gm107_create_image_handle(struct pipe_context * pipe,const struct pipe_image_view * view)1421 gm107_create_image_handle(struct pipe_context *pipe,
1422 const struct pipe_image_view *view)
1423 {
1424 /* GM107+ use TIC handles to reference images. As such, image handles are
1425 * just the TIC id.
1426 */
1427 struct nvc0_context *nvc0 = nvc0_context(pipe);
1428 struct nouveau_pushbuf *push = nvc0->base.pushbuf;
1429 struct pipe_sampler_view *sview =
1430 gm107_create_texture_view_from_image(pipe, view);
1431 struct nv50_tic_entry *tic = nv50_tic_entry(sview);
1432
1433 if (tic == NULL)
1434 goto fail;
1435
1436 tic->bindless = 1;
1437 tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
1438 if (tic->id < 0)
1439 goto fail;
1440
1441 nve4_p2mf_push_linear(&nvc0->base, nvc0->screen->txc, tic->id * 32,
1442 NV_VRAM_DOMAIN(&nvc0->screen->base), 32,
1443 tic->tic);
1444
1445 IMMED_NVC0(push, NVC0_3D(TIC_FLUSH), 0);
1446
1447 nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
1448
1449 // Compute handle. This will include the TIC as well as some additional
1450 // info regarding the bound 3d surface layer, if applicable.
1451 uint64_t handle = 0x100000000ULL | tic->id;
1452 struct nv04_resource *res = nv04_resource(view->resource);
1453 if (res->base.target == PIPE_TEXTURE_3D) {
1454 handle |= 1 << 11;
1455 handle |= view->u.tex.first_layer << (11 + 16);
1456 }
1457 return handle;
1458
1459 fail:
1460 FREE(tic);
1461 return 0;
1462 }
1463
1464 static void
gm107_delete_image_handle(struct pipe_context * pipe,uint64_t handle)1465 gm107_delete_image_handle(struct pipe_context *pipe, uint64_t handle)
1466 {
1467 struct nvc0_context *nvc0 = nvc0_context(pipe);
1468 int tic = handle & NVE4_TIC_ENTRY_INVALID;
1469 struct nv50_tic_entry *entry = nvc0->screen->tic.entries[tic];
1470 struct pipe_sampler_view *view = &entry->pipe;
1471 assert(entry->bindless == 1);
1472 assert(!view_bound(nvc0, view));
1473 entry->bindless = 0;
1474 nvc0_screen_tic_unlock(nvc0->screen, entry);
1475 pipe_sampler_view_reference(&view, NULL);
1476 }
1477
1478 static void
gm107_make_image_handle_resident(struct pipe_context * pipe,uint64_t handle,unsigned access,bool resident)1479 gm107_make_image_handle_resident(struct pipe_context *pipe, uint64_t handle,
1480 unsigned access, bool resident)
1481 {
1482 struct nvc0_context *nvc0 = nvc0_context(pipe);
1483
1484 if (resident) {
1485 struct nvc0_resident *res = calloc(1, sizeof(struct nvc0_resident));
1486 struct nv50_tic_entry *tic =
1487 nvc0->screen->tic.entries[handle & NVE4_TIC_ENTRY_INVALID];
1488 assert(tic);
1489 assert(tic->bindless);
1490
1491 res->handle = handle;
1492 res->buf = nv04_resource(tic->pipe.texture);
1493 res->flags = (access & 3) << 8;
1494 if (res->buf->base.target == PIPE_BUFFER &&
1495 access & PIPE_IMAGE_ACCESS_WRITE)
1496 util_range_add(&res->buf->base, &res->buf->valid_buffer_range,
1497 tic->pipe.u.buf.offset,
1498 tic->pipe.u.buf.offset + tic->pipe.u.buf.size);
1499 list_add(&res->list, &nvc0->img_head);
1500 } else {
1501 list_for_each_entry_safe(struct nvc0_resident, pos, &nvc0->img_head, list) {
1502 if (pos->handle == handle) {
1503 list_del(&pos->list);
1504 free(pos);
1505 break;
1506 }
1507 }
1508 }
1509 }
1510
1511 void
nvc0_init_bindless_functions(struct pipe_context * pipe)1512 nvc0_init_bindless_functions(struct pipe_context *pipe) {
1513 pipe->create_texture_handle = nve4_create_texture_handle;
1514 pipe->delete_texture_handle = nve4_delete_texture_handle;
1515 pipe->make_texture_handle_resident = nve4_make_texture_handle_resident;
1516
1517 if (nvc0_context(pipe)->screen->base.class_3d < GM107_3D_CLASS) {
1518 pipe->create_image_handle = nve4_create_image_handle;
1519 pipe->delete_image_handle = nve4_delete_image_handle;
1520 pipe->make_image_handle_resident = nve4_make_image_handle_resident;
1521 } else {
1522 pipe->create_image_handle = gm107_create_image_handle;
1523 pipe->delete_image_handle = gm107_delete_image_handle;
1524 pipe->make_image_handle_resident = gm107_make_image_handle_resident;
1525 }
1526 }
1527
1528
1529 static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
1530 {
1531 [PIPE_FORMAT_R32G32B32A32_FLOAT] = GK104_IMAGE_FORMAT_RGBA32_FLOAT,
1532 [PIPE_FORMAT_R32G32B32A32_SINT] = GK104_IMAGE_FORMAT_RGBA32_SINT,
1533 [PIPE_FORMAT_R32G32B32A32_UINT] = GK104_IMAGE_FORMAT_RGBA32_UINT,
1534 [PIPE_FORMAT_R16G16B16A16_FLOAT] = GK104_IMAGE_FORMAT_RGBA16_FLOAT,
1535 [PIPE_FORMAT_R16G16B16A16_UNORM] = GK104_IMAGE_FORMAT_RGBA16_UNORM,
1536 [PIPE_FORMAT_R16G16B16A16_SNORM] = GK104_IMAGE_FORMAT_RGBA16_SNORM,
1537 [PIPE_FORMAT_R16G16B16A16_SINT] = GK104_IMAGE_FORMAT_RGBA16_SINT,
1538 [PIPE_FORMAT_R16G16B16A16_UINT] = GK104_IMAGE_FORMAT_RGBA16_UINT,
1539 [PIPE_FORMAT_B8G8R8A8_UNORM] = GK104_IMAGE_FORMAT_BGRA8_UNORM,
1540 [PIPE_FORMAT_R8G8B8A8_UNORM] = GK104_IMAGE_FORMAT_RGBA8_UNORM,
1541 [PIPE_FORMAT_R8G8B8A8_SNORM] = GK104_IMAGE_FORMAT_RGBA8_SNORM,
1542 [PIPE_FORMAT_R8G8B8A8_SINT] = GK104_IMAGE_FORMAT_RGBA8_SINT,
1543 [PIPE_FORMAT_R8G8B8A8_UINT] = GK104_IMAGE_FORMAT_RGBA8_UINT,
1544 [PIPE_FORMAT_R11G11B10_FLOAT] = GK104_IMAGE_FORMAT_R11G11B10_FLOAT,
1545 [PIPE_FORMAT_R10G10B10A2_UNORM] = GK104_IMAGE_FORMAT_RGB10_A2_UNORM,
1546 [PIPE_FORMAT_R10G10B10A2_UINT] = GK104_IMAGE_FORMAT_RGB10_A2_UINT,
1547 [PIPE_FORMAT_R32G32_FLOAT] = GK104_IMAGE_FORMAT_RG32_FLOAT,
1548 [PIPE_FORMAT_R32G32_SINT] = GK104_IMAGE_FORMAT_RG32_SINT,
1549 [PIPE_FORMAT_R32G32_UINT] = GK104_IMAGE_FORMAT_RG32_UINT,
1550 [PIPE_FORMAT_R16G16_FLOAT] = GK104_IMAGE_FORMAT_RG16_FLOAT,
1551 [PIPE_FORMAT_R16G16_UNORM] = GK104_IMAGE_FORMAT_RG16_UNORM,
1552 [PIPE_FORMAT_R16G16_SNORM] = GK104_IMAGE_FORMAT_RG16_SNORM,
1553 [PIPE_FORMAT_R16G16_SINT] = GK104_IMAGE_FORMAT_RG16_SINT,
1554 [PIPE_FORMAT_R16G16_UINT] = GK104_IMAGE_FORMAT_RG16_UINT,
1555 [PIPE_FORMAT_R8G8_UNORM] = GK104_IMAGE_FORMAT_RG8_UNORM,
1556 [PIPE_FORMAT_R8G8_SNORM] = GK104_IMAGE_FORMAT_RG8_SNORM,
1557 [PIPE_FORMAT_R8G8_SINT] = GK104_IMAGE_FORMAT_RG8_SINT,
1558 [PIPE_FORMAT_R8G8_UINT] = GK104_IMAGE_FORMAT_RG8_UINT,
1559 [PIPE_FORMAT_R32_FLOAT] = GK104_IMAGE_FORMAT_R32_FLOAT,
1560 [PIPE_FORMAT_R32_SINT] = GK104_IMAGE_FORMAT_R32_SINT,
1561 [PIPE_FORMAT_R32_UINT] = GK104_IMAGE_FORMAT_R32_UINT,
1562 [PIPE_FORMAT_R16_FLOAT] = GK104_IMAGE_FORMAT_R16_FLOAT,
1563 [PIPE_FORMAT_R16_UNORM] = GK104_IMAGE_FORMAT_R16_UNORM,
1564 [PIPE_FORMAT_R16_SNORM] = GK104_IMAGE_FORMAT_R16_SNORM,
1565 [PIPE_FORMAT_R16_SINT] = GK104_IMAGE_FORMAT_R16_SINT,
1566 [PIPE_FORMAT_R16_UINT] = GK104_IMAGE_FORMAT_R16_UINT,
1567 [PIPE_FORMAT_R8_UNORM] = GK104_IMAGE_FORMAT_R8_UNORM,
1568 [PIPE_FORMAT_R8_SNORM] = GK104_IMAGE_FORMAT_R8_SNORM,
1569 [PIPE_FORMAT_R8_SINT] = GK104_IMAGE_FORMAT_R8_SINT,
1570 [PIPE_FORMAT_R8_UINT] = GK104_IMAGE_FORMAT_R8_UINT,
1571 };
1572
1573 /* Auxiliary format description values for surface instructions.
1574 * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
1575 */
1576 static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
1577 {
1578 [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
1579 [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
1580 [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
1581
1582 [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
1583 [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
1584 [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
1585 [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
1586 [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
1587
1588 [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
1589 [PIPE_FORMAT_R32G32_SINT] = 0x3433,
1590 [PIPE_FORMAT_R32G32_UINT] = 0x3433,
1591
1592 [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
1593 [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24,
1594 [PIPE_FORMAT_B8G8R8A8_UNORM] = 0x2a24,
1595 [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
1596 [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
1597 [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
1598 [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
1599 [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
1600
1601 [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
1602 [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
1603 [PIPE_FORMAT_R16G16_SINT] = 0x2524,
1604 [PIPE_FORMAT_R16G16_UINT] = 0x2524,
1605 [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
1606
1607 [PIPE_FORMAT_R32_SINT] = 0x2024,
1608 [PIPE_FORMAT_R32_UINT] = 0x2024,
1609 [PIPE_FORMAT_R32_FLOAT] = 0x2024,
1610
1611 [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
1612 [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
1613 [PIPE_FORMAT_R8G8_SINT] = 0x1615,
1614 [PIPE_FORMAT_R8G8_UINT] = 0x1615,
1615
1616 [PIPE_FORMAT_R16_UNORM] = 0x1115,
1617 [PIPE_FORMAT_R16_SNORM] = 0x1115,
1618 [PIPE_FORMAT_R16_SINT] = 0x1115,
1619 [PIPE_FORMAT_R16_UINT] = 0x1115,
1620 [PIPE_FORMAT_R16_FLOAT] = 0x1115,
1621
1622 [PIPE_FORMAT_R8_UNORM] = 0x0206,
1623 [PIPE_FORMAT_R8_SNORM] = 0x0206,
1624 [PIPE_FORMAT_R8_SINT] = 0x0206,
1625 [PIPE_FORMAT_R8_UINT] = 0x0206
1626 };
1627
1628 /* NOTE: These are hardcoded offsets for the shader library.
1629 * TODO: Automate them.
1630 */
1631 static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
1632 {
1633 [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
1634 [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
1635 [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
1636 [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
1637 [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
1638 [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
1639 [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
1640 [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
1641 [PIPE_FORMAT_R32G32_FLOAT] = 0x428,
1642 [PIPE_FORMAT_R32G32_SINT] = 0x468,
1643 [PIPE_FORMAT_R32G32_UINT] = 0x468,
1644 [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
1645 [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530,
1646 [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
1647 [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
1648 [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
1649 [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
1650 [PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
1651 [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
1652 [PIPE_FORMAT_R16G16_UNORM] = 0x828,
1653 [PIPE_FORMAT_R16G16_SNORM] = 0x890,
1654 [PIPE_FORMAT_R16G16_SINT] = 0x8f0,
1655 [PIPE_FORMAT_R16G16_UINT] = 0x948,
1656 [PIPE_FORMAT_R16G16_FLOAT] = 0x998,
1657 [PIPE_FORMAT_R32_FLOAT] = 0x9e8,
1658 [PIPE_FORMAT_R32_SINT] = 0xa30,
1659 [PIPE_FORMAT_R32_UINT] = 0xa30,
1660 [PIPE_FORMAT_R8G8_UNORM] = 0xa78,
1661 [PIPE_FORMAT_R8G8_SNORM] = 0xae0,
1662 [PIPE_FORMAT_R8G8_UINT] = 0xb48,
1663 [PIPE_FORMAT_R8G8_SINT] = 0xb98,
1664 [PIPE_FORMAT_R16_UNORM] = 0xbe8,
1665 [PIPE_FORMAT_R16_SNORM] = 0xc48,
1666 [PIPE_FORMAT_R16_SINT] = 0xca0,
1667 [PIPE_FORMAT_R16_UINT] = 0xce8,
1668 [PIPE_FORMAT_R16_FLOAT] = 0xd30,
1669 [PIPE_FORMAT_R8_UNORM] = 0xd88,
1670 [PIPE_FORMAT_R8_SNORM] = 0xde0,
1671 [PIPE_FORMAT_R8_SINT] = 0xe38,
1672 [PIPE_FORMAT_R8_UINT] = 0xe88,
1673 [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
1674 };
1675