• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Collabora Ltd. and Red Hat Inc.
3  * SPDX-License-Identifier: MIT
4  */
5 #include "nvk_sampler.h"
6 
7 #include "nvk_device.h"
8 #include "nvk_entrypoints.h"
9 #include "nvk_physical_device.h"
10 
11 #include "vk_format.h"
12 #include "vk_sampler.h"
13 
14 #include "nouveau_context.h"
15 
16 #include "util/bitpack_helpers.h"
17 #include "util/format/format_utils.h"
18 #include "util/format_srgb.h"
19 
20 #include "cla097.h"
21 #include "clb197.h"
22 #include "cl9097tex.h"
23 #include "cla097tex.h"
24 #include "clb197tex.h"
25 #include "drf.h"
26 
27 ALWAYS_INLINE static void
__set_u32(uint32_t * o,uint32_t v,unsigned lo,unsigned hi)28 __set_u32(uint32_t *o, uint32_t v, unsigned lo, unsigned hi)
29 {
30    assert(lo <= hi && hi < 32);
31    *o |= util_bitpack_uint(v, lo % 32, hi % 32);
32 }
33 
34 #define FIXED_FRAC_BITS 8
35 
36 ALWAYS_INLINE static void
__set_ufixed(uint32_t * o,float v,unsigned lo,unsigned hi)37 __set_ufixed(uint32_t *o, float v, unsigned lo, unsigned hi)
38 {
39    assert(lo <= hi && hi < 32);
40    *o |= util_bitpack_ufixed_clamp(v, lo % 32, hi % 32, FIXED_FRAC_BITS);
41 }
42 
43 ALWAYS_INLINE static void
__set_sfixed(uint32_t * o,float v,unsigned lo,unsigned hi)44 __set_sfixed(uint32_t *o, float v, unsigned lo, unsigned hi)
45 {
46    assert(lo <= hi && hi < 32);
47    *o |= util_bitpack_sfixed_clamp(v, lo % 32, hi % 32, FIXED_FRAC_BITS);
48 }
49 
50 ALWAYS_INLINE static void
__set_bool(uint32_t * o,bool b,unsigned lo,unsigned hi)51 __set_bool(uint32_t *o, bool b, unsigned lo, unsigned hi)
52 {
53    assert(lo == hi && hi < 32);
54    *o |= util_bitpack_uint(b, lo % 32, hi % 32);
55 }
56 
57 #define MW(x) x
58 
59 #define SAMP_SET_U(o, NV, i, FIELD, val) \
60    __set_u32(&(o)[i], (val), DRF_LO(NV##_TEXSAMP##i##_##FIELD),\
61                              DRF_HI(NV##_TEXSAMP##i##_##FIELD))
62 
63 #define SAMP_SET_UF(o, NV, i, FIELD, val) \
64    __set_ufixed(&(o)[i], (val), DRF_LO(NV##_TEXSAMP##i##_##FIELD),\
65                                 DRF_HI(NV##_TEXSAMP##i##_##FIELD))
66 
67 #define SAMP_SET_SF(o, NV, i, FIELD, val) \
68    __set_sfixed(&(o)[i], (val), DRF_LO(NV##_TEXSAMP##i##_##FIELD),\
69                                 DRF_HI(NV##_TEXSAMP##i##_##FIELD))
70 
71 #define SAMP_SET_B(o, NV, i, FIELD, b) \
72    __set_bool(&(o)[i], (b), DRF_LO(NV##_TEXSAMP##i##_##FIELD),\
73                             DRF_HI(NV##_TEXSAMP##i##_##FIELD))
74 
75 #define SAMP_SET_E(o, NV, i, FIELD, E) \
76    SAMP_SET_U((o), NV, i, FIELD, NV##_TEXSAMP##i##_##FIELD##_##E)
77 
78 static inline uint32_t
vk_to_9097_address_mode(VkSamplerAddressMode addr_mode)79 vk_to_9097_address_mode(VkSamplerAddressMode addr_mode)
80 {
81 #define MODE(VK, NV) \
82    [VK_SAMPLER_ADDRESS_MODE_##VK] = NV9097_TEXSAMP0_ADDRESS_U_##NV
83    static const uint8_t vk_to_9097[] = {
84       MODE(REPEAT,               WRAP),
85       MODE(MIRRORED_REPEAT,      MIRROR),
86       MODE(CLAMP_TO_EDGE,        CLAMP_TO_EDGE),
87       MODE(CLAMP_TO_BORDER,      BORDER),
88       MODE(MIRROR_CLAMP_TO_EDGE, MIRROR_ONCE_CLAMP_TO_EDGE),
89    };
90 #undef MODE
91 
92    assert(addr_mode < ARRAY_SIZE(vk_to_9097));
93    return vk_to_9097[addr_mode];
94 }
95 
96 static uint32_t
vk_to_9097_texsamp_compare_op(VkCompareOp op)97 vk_to_9097_texsamp_compare_op(VkCompareOp op)
98 {
99 #define OP(VK, NV) \
100    [VK_COMPARE_OP_##VK] = NV9097_TEXSAMP0_DEPTH_COMPARE_FUNC_##NV
101    ASSERTED static const uint8_t vk_to_9097[] = {
102       OP(NEVER,            ZC_NEVER),
103       OP(LESS,             ZC_LESS),
104       OP(EQUAL,            ZC_EQUAL),
105       OP(LESS_OR_EQUAL,    ZC_LEQUAL),
106       OP(GREATER,          ZC_GREATER),
107       OP(NOT_EQUAL,        ZC_NOTEQUAL),
108       OP(GREATER_OR_EQUAL, ZC_GEQUAL),
109       OP(ALWAYS,           ZC_ALWAYS),
110    };
111 #undef OP
112 
113    assert(op < ARRAY_SIZE(vk_to_9097));
114    assert(op == vk_to_9097[op]);
115 
116    return op;
117 }
118 
119 static uint32_t
vk_to_9097_max_anisotropy(float max_anisotropy)120 vk_to_9097_max_anisotropy(float max_anisotropy)
121 {
122    if (max_anisotropy >= 16)
123       return NV9097_TEXSAMP0_MAX_ANISOTROPY_ANISO_16_TO_1;
124 
125    if (max_anisotropy >= 12)
126       return NV9097_TEXSAMP0_MAX_ANISOTROPY_ANISO_12_TO_1;
127 
128    uint32_t aniso_u32 = MAX2(0.0f, max_anisotropy);
129    return aniso_u32 >> 1;
130 }
131 
132 static uint32_t
vk_to_9097_trilin_opt(float max_anisotropy)133 vk_to_9097_trilin_opt(float max_anisotropy)
134 {
135    /* No idea if we want this but matching nouveau */
136    if (max_anisotropy >= 12)
137       return 0;
138 
139    if (max_anisotropy >= 4)
140       return 6;
141 
142    if (max_anisotropy >= 2)
143       return 4;
144 
145    return 0;
146 }
147 
148 static void
nvk_sampler_fill_header(const struct nvk_physical_device * pdev,const struct VkSamplerCreateInfo * info,const struct vk_sampler * vk_sampler,uint32_t * samp)149 nvk_sampler_fill_header(const struct nvk_physical_device *pdev,
150                         const struct VkSamplerCreateInfo *info,
151                         const struct vk_sampler *vk_sampler,
152                         uint32_t *samp)
153 {
154    SAMP_SET_U(samp, NV9097, 0, ADDRESS_U,
155               vk_to_9097_address_mode(info->addressModeU));
156    SAMP_SET_U(samp, NV9097, 0, ADDRESS_V,
157               vk_to_9097_address_mode(info->addressModeV));
158    SAMP_SET_U(samp, NV9097, 0, ADDRESS_P,
159               vk_to_9097_address_mode(info->addressModeW));
160 
161    if (info->compareEnable) {
162       SAMP_SET_B(samp, NV9097, 0, DEPTH_COMPARE, true);
163       SAMP_SET_U(samp, NV9097, 0, DEPTH_COMPARE_FUNC,
164                  vk_to_9097_texsamp_compare_op(info->compareOp));
165    }
166 
167    SAMP_SET_B(samp, NV9097, 0, S_R_G_B_CONVERSION, true);
168    SAMP_SET_E(samp, NV9097, 0, FONT_FILTER_WIDTH, SIZE_2);
169    SAMP_SET_E(samp, NV9097, 0, FONT_FILTER_HEIGHT, SIZE_2);
170 
171    if (info->anisotropyEnable) {
172       SAMP_SET_U(samp, NV9097, 0, MAX_ANISOTROPY,
173                  vk_to_9097_max_anisotropy(info->maxAnisotropy));
174    }
175 
176    switch (info->magFilter) {
177    case VK_FILTER_NEAREST:
178       SAMP_SET_E(samp, NV9097, 1, MAG_FILTER, MAG_POINT);
179       break;
180    case VK_FILTER_LINEAR:
181       SAMP_SET_E(samp, NV9097, 1, MAG_FILTER, MAG_LINEAR);
182       break;
183    default:
184       unreachable("Invalid filter");
185    }
186 
187    switch (info->minFilter) {
188    case VK_FILTER_NEAREST:
189       SAMP_SET_E(samp, NV9097, 1, MIN_FILTER, MIN_POINT);
190       break;
191    case VK_FILTER_LINEAR:
192       if (info->anisotropyEnable)
193          SAMP_SET_E(samp, NV9097, 1, MIN_FILTER, MIN_ANISO);
194       else
195          SAMP_SET_E(samp, NV9097, 1, MIN_FILTER, MIN_LINEAR);
196       break;
197    default:
198       unreachable("Invalid filter");
199    }
200 
201    switch (info->mipmapMode) {
202    case VK_SAMPLER_MIPMAP_MODE_NEAREST:
203       SAMP_SET_E(samp, NV9097, 1, MIP_FILTER, MIP_POINT);
204       break;
205    case VK_SAMPLER_MIPMAP_MODE_LINEAR:
206       SAMP_SET_E(samp, NV9097, 1, MIP_FILTER, MIP_LINEAR);
207       break;
208    default:
209       unreachable("Invalid mipmap mode");
210    }
211 
212    assert(pdev->info.cls_eng3d >= KEPLER_A);
213    if (info->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT) {
214       SAMP_SET_E(samp, NVA097, 1, CUBEMAP_INTERFACE_FILTERING, USE_WRAP);
215    } else {
216       SAMP_SET_E(samp, NVA097, 1, CUBEMAP_INTERFACE_FILTERING, AUTO_SPAN_SEAM);
217    }
218 
219    if (pdev->info.cls_eng3d >= MAXWELL_B) {
220       switch (vk_sampler->reduction_mode) {
221       case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE:
222          SAMP_SET_E(samp, NVB197, 1, REDUCTION_FILTER, RED_NONE);
223          break;
224       case VK_SAMPLER_REDUCTION_MODE_MIN:
225          SAMP_SET_E(samp, NVB197, 1, REDUCTION_FILTER, RED_MINIMUM);
226          break;
227       case VK_SAMPLER_REDUCTION_MODE_MAX:
228          SAMP_SET_E(samp, NVB197, 1, REDUCTION_FILTER, RED_MAXIMUM);
229          break;
230       default:
231          unreachable("Invalid reduction mode");
232       }
233    }
234 
235    SAMP_SET_SF(samp, NV9097, 1, MIP_LOD_BIAS, info->mipLodBias);
236 
237    assert(pdev->info.cls_eng3d >= KEPLER_A);
238    if (info->unnormalizedCoordinates) {
239       SAMP_SET_E(samp, NVA097, 1, FLOAT_COORD_NORMALIZATION,
240                                   FORCE_UNNORMALIZED_COORDS);
241    } else {
242       SAMP_SET_E(samp, NVA097, 1, FLOAT_COORD_NORMALIZATION,
243                                   USE_HEADER_SETTING);
244    }
245    SAMP_SET_U(samp, NV9097, 1, TRILIN_OPT,
246               vk_to_9097_trilin_opt(info->maxAnisotropy));
247 
248    SAMP_SET_UF(samp, NV9097, 2, MIN_LOD_CLAMP, info->minLod);
249    SAMP_SET_UF(samp, NV9097, 2, MAX_LOD_CLAMP, info->maxLod);
250 
251    VkClearColorValue bc = vk_sampler->border_color_value;
252    uint8_t bc_srgb[3];
253 
254    const VkSamplerBorderColorComponentMappingCreateInfoEXT *swiz_info =
255       vk_find_struct_const(info->pNext,
256                            SAMPLER_BORDER_COLOR_COMPONENT_MAPPING_CREATE_INFO_EXT);
257    if (swiz_info) {
258       if (swiz_info->srgb) {
259          for (uint32_t i = 0; i < 3; i++)
260             bc.float32[i] = util_format_linear_to_srgb_float(bc.float32[i]);
261       }
262 
263       const bool is_int = vk_border_color_is_int(info->borderColor);
264       bc = vk_swizzle_color_value(bc, swiz_info->components, is_int);
265 
266       for (uint32_t i = 0; i < 3; i++)
267          bc_srgb[i] = _mesa_float_to_unorm(bc.float32[i], 8);
268    } else {
269       /* Otherwise, we can assume no swizzle or that the border color is
270        * transparent black or opaque white and there's nothing to do but
271        * convert the (unswizzled) border color to sRGB.
272        */
273       for (unsigned i = 0; i < 3; i++)
274          bc_srgb[i] = util_format_linear_float_to_srgb_8unorm(bc.float32[i]);
275    }
276 
277    SAMP_SET_U(samp, NV9097, 2, S_R_G_B_BORDER_COLOR_R, bc_srgb[0]);
278    SAMP_SET_U(samp, NV9097, 3, S_R_G_B_BORDER_COLOR_G, bc_srgb[1]);
279    SAMP_SET_U(samp, NV9097, 3, S_R_G_B_BORDER_COLOR_B, bc_srgb[2]);
280 
281    SAMP_SET_U(samp, NV9097, 4, BORDER_COLOR_R, bc.uint32[0]);
282    SAMP_SET_U(samp, NV9097, 5, BORDER_COLOR_G, bc.uint32[1]);
283    SAMP_SET_U(samp, NV9097, 6, BORDER_COLOR_B, bc.uint32[2]);
284    SAMP_SET_U(samp, NV9097, 7, BORDER_COLOR_A, bc.uint32[3]);
285 }
286 
287 VKAPI_ATTR VkResult VKAPI_CALL
nvk_CreateSampler(VkDevice device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)288 nvk_CreateSampler(VkDevice device,
289                   const VkSamplerCreateInfo *pCreateInfo,
290                   const VkAllocationCallbacks *pAllocator,
291                   VkSampler *pSampler)
292 {
293    VK_FROM_HANDLE(nvk_device, dev, device);
294    struct nvk_sampler *sampler;
295    VkResult result;
296 
297    sampler = vk_sampler_create(&dev->vk, pCreateInfo,
298                                pAllocator, sizeof(*sampler));
299    if (!sampler)
300       return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
301 
302    uint32_t samp[8] = {};
303    sampler->plane_count = 1;
304    nvk_sampler_fill_header(dev->pdev, pCreateInfo, &sampler->vk, samp);
305    result = nvk_descriptor_table_add(dev, &dev->samplers,
306                                      samp, sizeof(samp),
307                                      &sampler->planes[0].desc_index);
308    if (result != VK_SUCCESS) {
309       vk_sampler_destroy(&dev->vk, pAllocator, &sampler->vk);
310       return result;
311    }
312 
313    /* In order to support CONVERSION_SEPARATE_RECONSTRUCTION_FILTER_BIT, we
314     * need multiple sampler planes: at minimum we will need one for luminance
315     * (the default), and one for chroma.  Each sampler plane needs its own
316     * sampler table entry.  However, sampler table entries are very rare on
317     * NVIDIA; we only have 4096 entries for the whole VkDevice, and each plane
318     * would burn one of those. So we make sure to allocate only the minimum
319     * amount that we actually need (i.e., either 1 or 2), and then just copy
320     * the last sampler plane out as far as we need to fill the number of image
321     * planes.
322     */
323 
324    if (sampler->vk.ycbcr_conversion) {
325       const VkFilter chroma_filter =
326          sampler->vk.ycbcr_conversion->state.chroma_filter;
327       if (pCreateInfo->magFilter != chroma_filter ||
328           pCreateInfo->minFilter != chroma_filter) {
329          VkSamplerCreateInfo plane2_info = *pCreateInfo;
330          plane2_info.magFilter = chroma_filter;
331          plane2_info.minFilter = chroma_filter;
332 
333          memset(samp, 0, sizeof(samp));
334          sampler->plane_count = 2;
335          nvk_sampler_fill_header(dev->pdev, &plane2_info, &sampler->vk, samp);
336          result = nvk_descriptor_table_add(dev, &dev->samplers,
337                                            samp, sizeof(samp),
338                                            &sampler->planes[1].desc_index);
339          if (result != VK_SUCCESS) {
340             nvk_descriptor_table_remove(dev, &dev->samplers,
341                                         sampler->planes[0].desc_index);
342             vk_sampler_destroy(&dev->vk, pAllocator, &sampler->vk);
343             return result;
344          }
345       }
346    }
347 
348    *pSampler = nvk_sampler_to_handle(sampler);
349 
350    return VK_SUCCESS;
351 }
352 
353 VKAPI_ATTR void VKAPI_CALL
nvk_DestroySampler(VkDevice device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)354 nvk_DestroySampler(VkDevice device,
355                    VkSampler _sampler,
356                    const VkAllocationCallbacks *pAllocator)
357 {
358    VK_FROM_HANDLE(nvk_device, dev, device);
359    VK_FROM_HANDLE(nvk_sampler, sampler, _sampler);
360 
361    if (!sampler)
362       return;
363 
364    for (uint8_t plane = 0; plane < sampler->plane_count; plane++) {
365       nvk_descriptor_table_remove(dev, &dev->samplers,
366                                   sampler->planes[plane].desc_index);
367    }
368 
369    vk_sampler_destroy(&dev->vk, pAllocator, &sampler->vk);
370 }
371