• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2024 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 #![allow(unused_macros)]
5 
6 extern crate bitview;
7 extern crate nvidia_headers;
8 
9 use bitview::*;
10 use nil_rs_bindings::*;
11 use nvidia_headers::classes::cl9097::tex as cl9097;
12 use nvidia_headers::classes::cl9097::FERMI_A;
13 use nvidia_headers::classes::clb097::tex as clb097;
14 use nvidia_headers::classes::clb097::MAXWELL_A;
15 use nvidia_headers::classes::clc097::tex as clc097;
16 use nvidia_headers::classes::clc097::PASCAL_A;
17 use nvidia_headers::classes::clc397::VOLTA_A;
18 use paste::paste;
19 use std::ops::Range;
20 
21 use crate::extent::{units, Extent4D};
22 use crate::format::Format;
23 use crate::image::Image;
24 use crate::image::ImageDim;
25 use crate::image::SampleLayout;
26 use crate::image::View;
27 use crate::image::ViewType;
28 use crate::tiling::GOBType;
29 
30 macro_rules! set_enum {
31     ($th:expr, $cls:ident, $field:ident, $enum:ident) => {
32         paste! {
33             $th.set_field($cls::$field, $cls::[<$field _ $enum>])
34         }
35     };
36 }
37 
38 trait SetUFixed {
set_ufixed(&mut self, range: Range<usize>, val: f32)39     fn set_ufixed(&mut self, range: Range<usize>, val: f32);
40 }
41 
42 const FIXED_FRAC_BITS: u32 = 8;
43 
44 impl<T: SetFieldU64> SetUFixed for T {
set_ufixed(&mut self, range: Range<usize>, val: f32)45     fn set_ufixed(&mut self, range: Range<usize>, val: f32) {
46         assert!(range.len() >= FIXED_FRAC_BITS as usize);
47         let scaled = val * ((1 << FIXED_FRAC_BITS) as f32);
48         let scaled_max = ((1 << range.len()) - 1) as f32;
49         let u_val = scaled.clamp(0.0, scaled_max).round() as u32;
50         self.set_field(range, u_val);
51     }
52 }
53 
nv9097_th_v2_source( fmt: &nil_tic_format, swizzle: pipe_swizzle, is_int: bool, ) -> u3254 fn nv9097_th_v2_source(
55     fmt: &nil_tic_format,
56     swizzle: pipe_swizzle,
57     is_int: bool,
58 ) -> u32 {
59     match swizzle {
60         PIPE_SWIZZLE_X => fmt.src_x(),
61         PIPE_SWIZZLE_Y => fmt.src_y(),
62         PIPE_SWIZZLE_Z => fmt.src_z(),
63         PIPE_SWIZZLE_W => fmt.src_w(),
64         PIPE_SWIZZLE_0 => cl9097::TEXHEADV2_X_SOURCE_IN_ZERO,
65         PIPE_SWIZZLE_1 => {
66             if is_int {
67                 cl9097::TEXHEADV2_X_SOURCE_IN_ONE_INT
68             } else {
69                 cl9097::TEXHEADV2_X_SOURCE_IN_ONE_FLOAT
70             }
71         }
72         other => panic!("Invalid component swizzle {}", other),
73     }
74 }
75 
nvb097_th_bl_source( fmt: &nil_tic_format, swizzle: pipe_swizzle, is_int: bool, ) -> u3276 fn nvb097_th_bl_source(
77     fmt: &nil_tic_format,
78     swizzle: pipe_swizzle,
79     is_int: bool,
80 ) -> u32 {
81     match swizzle {
82         PIPE_SWIZZLE_X => fmt.src_x(),
83         PIPE_SWIZZLE_Y => fmt.src_y(),
84         PIPE_SWIZZLE_Z => fmt.src_z(),
85         PIPE_SWIZZLE_W => fmt.src_w(),
86         PIPE_SWIZZLE_0 => clb097::TEXHEADV2_X_SOURCE_IN_ZERO,
87         PIPE_SWIZZLE_1 => {
88             if is_int {
89                 clb097::TEXHEADV2_X_SOURCE_IN_ONE_INT
90             } else {
91                 clb097::TEXHEADV2_X_SOURCE_IN_ONE_FLOAT
92             }
93         }
94         other => panic!("Invalid component swizzle {}", other),
95     }
96 }
97 
98 type THBitView<'a> = BitMutView<'a, [u32; 8]>;
99 
nv9097_set_th_v2_0<'a>( th: &mut THBitView<'a>, format: &Format, swizzle: [nil_rs_bindings::pipe_swizzle; 4], )100 fn nv9097_set_th_v2_0<'a>(
101     th: &mut THBitView<'a>,
102     format: &Format,
103     swizzle: [nil_rs_bindings::pipe_swizzle; 4],
104 ) {
105     let fmt = &format.info().tic;
106     let is_int = format.is_integer();
107     let source = [
108         nv9097_th_v2_source(fmt, swizzle[0], is_int),
109         nv9097_th_v2_source(fmt, swizzle[1], is_int),
110         nv9097_th_v2_source(fmt, swizzle[2], is_int),
111         nv9097_th_v2_source(fmt, swizzle[3], is_int),
112     ];
113 
114     th.set_field(cl9097::TEXHEADV2_COMPONENT_SIZES, fmt.comp_sizes());
115     th.set_field(cl9097::TEXHEADV2_R_DATA_TYPE, fmt.type_r());
116     th.set_field(cl9097::TEXHEADV2_G_DATA_TYPE, fmt.type_g());
117     th.set_field(cl9097::TEXHEADV2_B_DATA_TYPE, fmt.type_b());
118     th.set_field(cl9097::TEXHEADV2_A_DATA_TYPE, fmt.type_a());
119     th.set_field(cl9097::TEXHEADV2_X_SOURCE, source[0]);
120     th.set_field(cl9097::TEXHEADV2_Y_SOURCE, source[1]);
121     th.set_field(cl9097::TEXHEADV2_Z_SOURCE, source[2]);
122     th.set_field(cl9097::TEXHEADV2_W_SOURCE, source[3]);
123 }
124 
nvb097_set_th_bl_0<'a>( th: &mut THBitView<'a>, format: &Format, swizzle: [nil_rs_bindings::pipe_swizzle; 4], )125 fn nvb097_set_th_bl_0<'a>(
126     th: &mut THBitView<'a>,
127     format: &Format,
128     swizzle: [nil_rs_bindings::pipe_swizzle; 4],
129 ) {
130     let fmt = &format.info().tic;
131     let is_int = format.is_integer();
132     let source = [
133         nvb097_th_bl_source(fmt, swizzle[0], is_int),
134         nvb097_th_bl_source(fmt, swizzle[1], is_int),
135         nvb097_th_bl_source(fmt, swizzle[2], is_int),
136         nvb097_th_bl_source(fmt, swizzle[3], is_int),
137     ];
138 
139     th.set_field(clb097::TEXHEAD_BL_COMPONENTS, fmt.comp_sizes());
140     th.set_field(clb097::TEXHEAD_BL_R_DATA_TYPE, fmt.type_r());
141     th.set_field(clb097::TEXHEAD_BL_G_DATA_TYPE, fmt.type_g());
142     th.set_field(clb097::TEXHEAD_BL_B_DATA_TYPE, fmt.type_b());
143     th.set_field(clb097::TEXHEAD_BL_A_DATA_TYPE, fmt.type_a());
144     th.set_field(clb097::TEXHEAD_BL_X_SOURCE, source[0]);
145     th.set_field(clb097::TEXHEAD_BL_Y_SOURCE, source[1]);
146     th.set_field(clb097::TEXHEAD_BL_Z_SOURCE, source[2]);
147     th.set_field(clb097::TEXHEAD_BL_W_SOURCE, source[3]);
148 }
149 
pipe_to_nv_texture_type(ty: ViewType) -> u32150 fn pipe_to_nv_texture_type(ty: ViewType) -> u32 {
151     match ty {
152         ViewType::_1D => clb097::TEXHEAD_BL_TEXTURE_TYPE_ONE_D,
153         ViewType::_2D => clb097::TEXHEAD_BL_TEXTURE_TYPE_TWO_D,
154         ViewType::_3D | ViewType::_3DSliced => {
155             clb097::TEXHEAD_BL_TEXTURE_TYPE_THREE_D
156         }
157         ViewType::Cube => clb097::TEXHEAD_BL_TEXTURE_TYPE_CUBEMAP,
158         ViewType::_1DArray => clb097::TEXHEAD_BL_TEXTURE_TYPE_ONE_D_ARRAY,
159         ViewType::_2DArray => clb097::TEXHEAD_BL_TEXTURE_TYPE_TWO_D_ARRAY,
160         ViewType::CubeArray => clb097::TEXHEAD_BL_TEXTURE_TYPE_CUBEMAP_ARRAY,
161     }
162 }
163 
nil_rs_to_nv9097_multi_sample_count(sample_layout: SampleLayout) -> u32164 fn nil_rs_to_nv9097_multi_sample_count(sample_layout: SampleLayout) -> u32 {
165     match sample_layout {
166         SampleLayout::_1x1 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_1X1,
167         SampleLayout::_2x1 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_2X1,
168         SampleLayout::_2x1D3d => {
169             cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_2X1_D3D
170         }
171         SampleLayout::_2x2 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_2X2,
172         SampleLayout::_4x2 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_4X2,
173         SampleLayout::_4x2D3d => {
174             cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_4X2_D3D
175         }
176         SampleLayout::_4x4 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_4X4,
177         SampleLayout::Invalid => panic!("Invalid sample layout"),
178     }
179 }
180 
nil_rs_to_nvb097_multi_sample_count(sample_layout: SampleLayout) -> u32181 fn nil_rs_to_nvb097_multi_sample_count(sample_layout: SampleLayout) -> u32 {
182     match sample_layout {
183         SampleLayout::_1x1 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_1X1,
184         SampleLayout::_2x1 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_2X1,
185         SampleLayout::_2x1D3d => {
186             clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_2X1_D3D
187         }
188         SampleLayout::_2x2 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_2X2,
189         SampleLayout::_4x2 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_4X2,
190         SampleLayout::_4x2D3d => {
191             clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_4X2_D3D
192         }
193         SampleLayout::_4x4 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_4X4,
194         SampleLayout::Invalid => panic!("Invalid sample layout"),
195     }
196 }
197 
nil_rs_max_mip_level(image: &Image, view: &View) -> u32198 fn nil_rs_max_mip_level(image: &Image, view: &View) -> u32 {
199     if view.view_type != ViewType::_3D
200         && view.array_len == 1
201         && view.base_level == 0
202         && view.num_levels == 1
203     {
204         // The Unnormalized coordinates bit in the sampler gets ignored if the
205         // referenced image has more than one miplevel.  Fortunately, Vulkan has
206         // restrictions requiring the view to be a single-layer single-LOD view
207         // in order to use nonnormalizedCoordinates = VK_TRUE in the sampler.
208         // From the Vulkan 1.3.255 spec:
209         //
210         //  "When unnormalizedCoordinates is VK_TRUE, images the sampler is
211         //  used with in the shader have the following requirements:
212         //
213         //      - The viewType must be either VK_IMAGE_VIEW_TYPE_1D or
214         //        VK_IMAGE_VIEW_TYPE_2D.
215         //      - The image view must have a single layer and a single mip
216         //        level."
217         //
218         // Under these conditions, the view is simply LOD 0 of a single array
219         // slice so we don't need to care about aray stride between slices so
220         // it's safe to set the number of miplevels to 0 regardless of how many
221         // the image actually has.
222         0
223     } else {
224         image.num_levels - 1
225     }
226 }
227 
normalize_extent(image: &Image, view: &View) -> Extent4D<units::Pixels>228 fn normalize_extent(image: &Image, view: &View) -> Extent4D<units::Pixels> {
229     let mut extent = image.extent_px;
230     match view.view_type {
231         ViewType::_1D
232         | ViewType::_2D
233         | ViewType::_1DArray
234         | ViewType::_2DArray => {
235             assert!(image.extent_px.depth == 1);
236             extent.depth = view.array_len;
237         }
238         ViewType::_3D => {
239             assert!(image.dim == ImageDim::_3D);
240             extent.depth = image.extent_px.depth;
241         }
242         ViewType::Cube | ViewType::CubeArray => {
243             assert!(image.dim == ImageDim::_2D);
244             assert!(view.array_len % 6 == 0);
245             extent.depth = view.array_len / 6;
246         }
247         ViewType::_3DSliced => {
248             assert!(image.dim == ImageDim::_3D);
249             extent.depth = view.array_len;
250         }
251     }
252     extent.array_len = 0;
253 
254     extent
255 }
256 
nv9097_fill_tic( image: &Image, view: &View, base_address: u64, desc_out: &mut [u32; 8], )257 fn nv9097_fill_tic(
258     image: &Image,
259     view: &View,
260     base_address: u64,
261     desc_out: &mut [u32; 8],
262 ) {
263     assert!(image.format.el_size_B() == view.format.el_size_B());
264     assert!(view.base_level + view.num_levels <= image.num_levels);
265     assert!(
266         view.base_array_layer + view.array_len <= image.extent_px.array_len
267     );
268 
269     *desc_out = [0u32; 8];
270     let mut th = BitMutView::new(desc_out);
271 
272     th.set_field(cl9097::TEXHEADV2_USE_TEXTURE_HEADER_VERSION2, true);
273     nv9097_set_th_v2_0(&mut th, &view.format, view.swizzle);
274 
275     // There's no base layer field in the texture header
276     let layer_address = base_address
277         + u64::from(view.base_array_layer)
278         + u64::from(image.array_stride_B);
279 
280     th.set_field(cl9097::TEXHEADV2_OFFSET_LOWER, layer_address as u32);
281     th.set_field(cl9097::TEXHEADV2_OFFSET_UPPER, (layer_address >> 32) as u32);
282 
283     let tiling = &image.levels[0].tiling;
284 
285     if tiling.is_tiled() {
286         set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, BLOCKLINEAR);
287 
288         assert!(tiling.gob_type == GOBType::Fermi8);
289         assert!(tiling.x_log2 == 0);
290         set_enum!(th, cl9097, TEXHEADV2_GOBS_PER_BLOCK_WIDTH, ONE_GOB);
291         th.set_field(cl9097::TEXHEADV2_GOBS_PER_BLOCK_HEIGHT, tiling.y_log2);
292         th.set_field(cl9097::TEXHEADV2_GOBS_PER_BLOCK_DEPTH, tiling.z_log2);
293 
294         let nv_tex_type = pipe_to_nv_texture_type(view.view_type);
295         th.set_field(cl9097::TEXHEADV2_TEXTURE_TYPE, nv_tex_type);
296     } else {
297         set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, PITCH);
298 
299         let pitch = image.levels[0].row_stride_B;
300         th.set_field(cl9097::TEXHEADV2_PITCH, pitch);
301 
302         assert!(
303             view.view_type == ViewType::_2D
304                 || view.view_type == ViewType::_2DArray
305         );
306         assert!(image.sample_layout == SampleLayout::_1x1);
307         assert!(view.num_levels == 1);
308         set_enum!(th, cl9097, TEXHEADV2_TEXTURE_TYPE, TWO_D_NO_MIPMAP);
309     }
310 
311     set_enum!(th, cl9097, TEXHEADV2_LOD_ANISO_QUALITY, LOD_QUALITY_HIGH);
312     set_enum!(th, cl9097, TEXHEADV2_LOD_ISO_QUALITY, LOD_QUALITY_HIGH);
313     set_enum!(
314         th,
315         cl9097,
316         TEXHEADV2_ANISO_COARSE_SPREAD_MODIFIER,
317         SPREAD_MODIFIER_NONE
318     );
319 
320     let extent = normalize_extent(image, view);
321     th.set_field(cl9097::TEXHEADV2_WIDTH, extent.width);
322     th.set_field(cl9097::TEXHEADV2_HEIGHT, extent.height);
323     th.set_field(cl9097::TEXHEADV2_DEPTH, extent.depth);
324 
325     let max_mip_level = nil_rs_max_mip_level(image, view);
326     th.set_field(cl9097::TEXHEADV2_MAX_MIP_LEVEL, max_mip_level);
327 
328     th.set_field(cl9097::TEXHEADV2_S_R_G_B_CONVERSION, view.format.is_srgb());
329 
330     set_enum!(th, cl9097, TEXHEADV2_BORDER_SOURCE, BORDER_COLOR);
331 
332     // In the sampler, the two options for FLOAT_COORD_NORMALIZATION are:
333     //
334     // - FORCE_UNNORMALIZED_COORDS
335     // - USE_HEADER_SETTING
336     //
337     // So we set it to normalized in the header and let the sampler select that
338     // or force non-normalized.
339     th.set_field(cl9097::TEXHEADV2_NORMALIZED_COORDS, true);
340 
341     set_enum!(
342         th,
343         cl9097,
344         TEXHEADV2_ANISO_FINE_SPREAD_FUNC,
345         SPREAD_FUNC_TWO
346     );
347     set_enum!(
348         th,
349         cl9097,
350         TEXHEADV2_ANISO_COARSE_SPREAD_FUNC,
351         SPREAD_FUNC_ONE
352     );
353 
354     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MIN_MIP_LEVEL, view.base_level);
355     th.set_field(
356         cl9097::TEXHEADV2_RES_VIEW_MAX_MIP_LEVEL,
357         view.num_levels + view.base_level - 1,
358     );
359 
360     let msc = nil_rs_to_nv9097_multi_sample_count(image.sample_layout);
361     th.set_field(cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT, msc);
362 
363     let min_lod_clamp = view.min_lod_clamp - (view.base_level as f32);
364     th.set_ufixed(cl9097::TEXHEADV2_MIN_LOD_CLAMP, min_lod_clamp);
365 }
366 
nvb097_fill_tic( dev: &nil_rs_bindings::nv_device_info, image: &Image, view: &View, base_address: u64, desc_out: &mut [u32; 8], )367 fn nvb097_fill_tic(
368     dev: &nil_rs_bindings::nv_device_info,
369     image: &Image,
370     view: &View,
371     base_address: u64,
372     desc_out: &mut [u32; 8],
373 ) {
374     assert!(image.format.el_size_B() == view.format.el_size_B());
375     assert!(view.base_level + view.num_levels <= image.num_levels);
376 
377     *desc_out = [0u32; 8];
378     let mut th = BitMutView::new(desc_out);
379 
380     nvb097_set_th_bl_0(&mut th, &view.format, view.swizzle);
381 
382     let tiling = &image.levels[0].tiling;
383 
384     // There's no base layer field in the texture header
385     let mut layer_address = base_address;
386     if view.view_type == ViewType::_3DSliced {
387         assert!(view.num_levels == 1);
388         assert!(
389             view.base_array_layer + view.array_len <= image.extent_px.depth
390         );
391 
392         layer_address +=
393             image.level_z_offset_B(view.base_level, view.base_array_layer);
394     } else {
395         assert!(
396             view.base_array_layer + view.array_len <= image.extent_px.array_len
397         );
398         layer_address +=
399             u64::from(view.base_array_layer) * u64::from(image.array_stride_B);
400     }
401 
402     if tiling.is_tiled() {
403         set_enum!(th, clb097, TEXHEAD_BL_HEADER_VERSION, SELECT_BLOCKLINEAR);
404 
405         let addr = BitView::new(&layer_address);
406         assert!(addr.get_bit_range_u64(0..9) == 0);
407         th.set_field(
408             clb097::TEXHEAD_BL_ADDRESS_BITS31TO9,
409             addr.get_bit_range_u64(9..32),
410         );
411         th.set_field(
412             clb097::TEXHEAD_BL_ADDRESS_BITS47TO32,
413             addr.get_bit_range_u64(32..48),
414         );
415         assert!(addr.get_bit_range_u64(48..64) == 0);
416 
417         assert!(tiling.gob_type == GOBType::Fermi8);
418 
419         set_enum!(th, clb097, TEXHEAD_BL_GOBS_PER_BLOCK_WIDTH, ONE_GOB);
420         th.set_field(clb097::TEXHEAD_BL_GOBS_PER_BLOCK_HEIGHT, tiling.y_log2);
421         th.set_field(clb097::TEXHEAD_BL_GOBS_PER_BLOCK_DEPTH, tiling.z_log2);
422         th.set_field(clb097::TEXHEAD_BL_TILE_WIDTH_IN_GOBS, tiling.x_log2);
423 
424         let nv_text_type = pipe_to_nv_texture_type(view.view_type);
425         th.set_field(clb097::TEXHEAD_BL_TEXTURE_TYPE, nv_text_type);
426     } else {
427         set_enum!(th, clb097, TEXHEAD_BL_HEADER_VERSION, SELECT_PITCH);
428 
429         let addr = BitView::new(&layer_address);
430         assert!(addr.get_bit_range_u64(0..5) == 0);
431         th.set_field(
432             clb097::TEXHEAD_PITCH_ADDRESS_BITS31TO5,
433             addr.get_bit_range_u64(5..32),
434         );
435         th.set_field(
436             clb097::TEXHEAD_PITCH_ADDRESS_BITS47TO32,
437             addr.get_bit_range_u64(32..48),
438         );
439         assert!(addr.get_bit_range_u64(48..64) == 0);
440 
441         let pitch = image.levels[0].row_stride_B;
442         let pitch = BitView::new(&pitch);
443         assert!(pitch.get_bit_range_u64(0..5) == 0);
444         assert!(pitch.get_bit_range_u64(21..32) == 0);
445         th.set_field(
446             clb097::TEXHEAD_PITCH_PITCH_BITS20TO5,
447             pitch.get_bit_range_u64(5..21),
448         );
449 
450         assert!(
451             view.view_type == ViewType::_2D
452                 || view.view_type == ViewType::_2DArray
453         );
454         assert!(image.sample_layout == SampleLayout::_1x1);
455         assert!(view.num_levels == 1);
456         set_enum!(th, clb097, TEXHEAD_PITCH_TEXTURE_TYPE, TWO_D_NO_MIPMAP);
457     }
458 
459     th.set_field(clb097::TEXHEAD_BL_LOD_ANISO_QUALITY2, true);
460     set_enum!(th, clb097, TEXHEAD_BL_LOD_ANISO_QUALITY, LOD_QUALITY_HIGH);
461     set_enum!(th, clb097, TEXHEAD_BL_LOD_ISO_QUALITY, LOD_QUALITY_HIGH);
462     set_enum!(
463         th,
464         clb097,
465         TEXHEAD_BL_ANISO_COARSE_SPREAD_MODIFIER,
466         SPREAD_MODIFIER_NONE
467     );
468 
469     let extent = normalize_extent(image, view);
470     th.set_field(clb097::TEXHEAD_BL_WIDTH_MINUS_ONE, extent.width - 1);
471 
472     if dev.cls_eng3d >= PASCAL_A {
473         let height_1 = extent.height - 1;
474         let depth_1 = extent.depth - 1;
475         th.set_field(clc097::TEXHEAD_BL_HEIGHT_MINUS_ONE, height_1 & 0xffff);
476         th.set_field(clc097::TEXHEAD_BL_HEIGHT_MINUS_ONE_BIT16, height_1 >> 16);
477         th.set_field(clc097::TEXHEAD_BL_DEPTH_MINUS_ONE, depth_1 & 0x3fff);
478         th.set_field(clc097::TEXHEAD_BL_DEPTH_MINUS_ONE_BIT14, depth_1 >> 14);
479     } else {
480         th.set_field(clb097::TEXHEAD_BL_HEIGHT_MINUS_ONE, extent.height - 1);
481         th.set_field(clb097::TEXHEAD_BL_DEPTH_MINUS_ONE, extent.depth - 1);
482     }
483 
484     let max_mip_level = nil_rs_max_mip_level(image, view);
485     th.set_field(clb097::TEXHEAD_BL_MAX_MIP_LEVEL, max_mip_level);
486 
487     th.set_field(clb097::TEXHEAD_BL_S_R_G_B_CONVERSION, view.format.is_srgb());
488 
489     set_enum!(th, clb097, TEXHEAD_BL_SECTOR_PROMOTION, PROMOTE_TO_2_V);
490     set_enum!(th, clb097, TEXHEAD_BL_BORDER_SIZE, BORDER_SAMPLER_COLOR);
491 
492     // In the sampler, the two options for FLOAT_COORD_NORMALIZATION are:
493     //
494     // - FORCE_UNNORMALIZED_COORDS
495     // - USE_HEADER_SETTING
496     //
497     // So we set it to normalized in the header and let the sampler select that
498     // or force non-normalized.
499 
500     th.set_field(clb097::TEXHEAD_BL_NORMALIZED_COORDS, true);
501     set_enum!(
502         th,
503         clb097,
504         TEXHEAD_BL_ANISO_FINE_SPREAD_FUNC,
505         SPREAD_FUNC_TWO
506     );
507     set_enum!(
508         th,
509         clb097,
510         TEXHEAD_BL_ANISO_COARSE_SPREAD_FUNC,
511         SPREAD_FUNC_ONE
512     );
513 
514     th.set_field(clb097::TEXHEAD_BL_RES_VIEW_MIN_MIP_LEVEL, view.base_level);
515 
516     let max_mip_level = view.num_levels + view.base_level - 1;
517     th.set_field(clb097::TEXHEAD_BL_RES_VIEW_MAX_MIP_LEVEL, max_mip_level);
518 
519     let msc = nil_rs_to_nvb097_multi_sample_count(image.sample_layout);
520     th.set_field(clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT, msc);
521 
522     let min_lod_clamp = view.min_lod_clamp - (view.base_level as f32);
523     th.set_ufixed(clb097::TEXHEAD_BL_MIN_LOD_CLAMP, min_lod_clamp);
524 }
525 
526 pub const IDENTITY_SWIZZLE: [nil_rs_bindings::pipe_swizzle; 4] = [
527     nil_rs_bindings::PIPE_SWIZZLE_X,
528     nil_rs_bindings::PIPE_SWIZZLE_Y,
529     nil_rs_bindings::PIPE_SWIZZLE_Z,
530     nil_rs_bindings::PIPE_SWIZZLE_W,
531 ];
532 
nv9097_nil_fill_buffer_tic( base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )533 fn nv9097_nil_fill_buffer_tic(
534     base_address: u64,
535     format: Format,
536     num_elements: u32,
537     desc_out: &mut [u32; 8],
538 ) {
539     *desc_out = [0u32; 8];
540     let mut th = BitMutView::new(desc_out);
541     th.set_field(cl9097::TEXHEADV2_USE_TEXTURE_HEADER_VERSION2, true);
542 
543     assert!(format.supports_buffer());
544     nv9097_set_th_v2_0(&mut th, &format, IDENTITY_SWIZZLE);
545 
546     th.set_field(cl9097::TEXHEADV2_OFFSET_LOWER, base_address as u32);
547     th.set_field(cl9097::TEXHEADV2_OFFSET_UPPER, (base_address >> 32) as u32);
548 
549     set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, PITCH);
550 
551     th.set_field(cl9097::TEXHEADV2_WIDTH, num_elements);
552 
553     set_enum!(th, cl9097, TEXHEADV2_TEXTURE_TYPE, ONE_D_BUFFER);
554 }
555 
nvb097_nil_fill_buffer_tic( base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )556 fn nvb097_nil_fill_buffer_tic(
557     base_address: u64,
558     format: Format,
559     num_elements: u32,
560     desc_out: &mut [u32; 8],
561 ) {
562     *desc_out = [0u32; 8];
563     let mut th = BitMutView::new(desc_out);
564 
565     assert!(format.supports_buffer());
566     nvb097_set_th_bl_0(&mut th, &format, IDENTITY_SWIZZLE);
567 
568     th.set_field(clb097::TEXHEAD_1D_ADDRESS_BITS31TO0, base_address as u32);
569     th.set_field(clb097::TEXHEAD_1D_ADDRESS_BITS47TO32, base_address >> 32);
570 
571     set_enum!(th, clb097, TEXHEAD_1D_HEADER_VERSION, SELECT_ONE_D_BUFFER);
572 
573     th.set_field(
574         clb097::TEXHEAD_1D_WIDTH_MINUS_ONE_BITS15TO0,
575         (num_elements - 1) & 0xffff,
576     );
577     th.set_field(
578         clb097::TEXHEAD_1D_WIDTH_MINUS_ONE_BITS31TO16,
579         (num_elements - 1) >> 16,
580     );
581 
582     set_enum!(th, clb097, TEXHEAD_1D_TEXTURE_TYPE, ONE_D_BUFFER);
583 
584     // TODO: Do we need this?
585     set_enum!(th, clb097, TEXHEAD_1D_SECTOR_PROMOTION, PROMOTE_TO_2_V);
586 }
587 
588 impl Image {
589     #[no_mangle]
nil_image_fill_tic( &self, dev: &nil_rs_bindings::nv_device_info, view: &View, base_address: u64, desc_out: &mut [u32; 8], )590     pub extern "C" fn nil_image_fill_tic(
591         &self,
592         dev: &nil_rs_bindings::nv_device_info,
593         view: &View,
594         base_address: u64,
595         desc_out: &mut [u32; 8],
596     ) {
597         self.fill_tic(dev, view, base_address, desc_out);
598     }
599 
fill_tic( &self, dev: &nil_rs_bindings::nv_device_info, view: &View, base_address: u64, desc_out: &mut [u32; 8], )600     pub fn fill_tic(
601         &self,
602         dev: &nil_rs_bindings::nv_device_info,
603         view: &View,
604         base_address: u64,
605         desc_out: &mut [u32; 8],
606     ) {
607         if dev.cls_eng3d >= MAXWELL_A {
608             nvb097_fill_tic(dev, self, view, base_address, desc_out);
609         } else if dev.cls_eng3d >= FERMI_A {
610             nv9097_fill_tic(self, view, base_address, desc_out);
611         } else {
612             panic!("Tesla and older not supported");
613         }
614     }
615 }
616 
617 #[no_mangle]
nil_buffer_fill_tic( dev: &nil_rs_bindings::nv_device_info, base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )618 pub extern "C" fn nil_buffer_fill_tic(
619     dev: &nil_rs_bindings::nv_device_info,
620     base_address: u64,
621     format: Format,
622     num_elements: u32,
623     desc_out: &mut [u32; 8],
624 ) {
625     fill_buffer_tic(dev, base_address, format, num_elements, desc_out);
626 }
627 
fill_buffer_tic( dev: &nil_rs_bindings::nv_device_info, base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )628 pub fn fill_buffer_tic(
629     dev: &nil_rs_bindings::nv_device_info,
630     base_address: u64,
631     format: Format,
632     num_elements: u32,
633     desc_out: &mut [u32; 8],
634 ) {
635     if dev.cls_eng3d >= MAXWELL_A {
636         nvb097_nil_fill_buffer_tic(base_address, format, num_elements, desc_out)
637     } else if dev.cls_eng3d >= FERMI_A {
638         nv9097_nil_fill_buffer_tic(base_address, format, num_elements, desc_out)
639     } else {
640         panic!("Tesla and older not supported");
641     }
642 }
643 
644 pub const ZERO_SWIZZLE: [nil_rs_bindings::pipe_swizzle; 4] = [
645     nil_rs_bindings::PIPE_SWIZZLE_0,
646     nil_rs_bindings::PIPE_SWIZZLE_0,
647     nil_rs_bindings::PIPE_SWIZZLE_0,
648     nil_rs_bindings::PIPE_SWIZZLE_0,
649 ];
650 
nv9097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8])651 fn nv9097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8]) {
652     *desc_out = [0u32; 8];
653     let mut th = BitMutView::new(desc_out);
654 
655     th.set_field(cl9097::TEXHEADV2_USE_TEXTURE_HEADER_VERSION2, true);
656     let format = Format::try_from(PIPE_FORMAT_R8_UNORM).unwrap();
657     nvb097_set_th_bl_0(&mut th, &format, ZERO_SWIZZLE);
658 
659     th.set_field(cl9097::TEXHEADV2_OFFSET_LOWER, zero_page_address as u32);
660     th.set_field(
661         cl9097::TEXHEADV2_OFFSET_UPPER,
662         (zero_page_address >> 32) as u32,
663     );
664 
665     set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, BLOCKLINEAR);
666     set_enum!(th, cl9097, TEXHEADV2_TEXTURE_TYPE, TWO_D_ARRAY);
667     th.set_field(cl9097::TEXHEADV2_NORMALIZED_COORDS, true);
668 
669     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MIN_MIP_LEVEL, 1_u8);
670     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MAX_MIP_LEVEL, 0_u8);
671 }
672 
nvb097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8])673 fn nvb097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8]) {
674     *desc_out = [0u32; 8];
675     let mut th = BitMutView::new(desc_out);
676 
677     let format = Format::try_from(PIPE_FORMAT_R8_UNORM).unwrap();
678     nvb097_set_th_bl_0(&mut th, &format, ZERO_SWIZZLE);
679 
680     set_enum!(th, clb097, TEXHEAD_BL_HEADER_VERSION, SELECT_BLOCKLINEAR);
681 
682     let addr = BitView::new(&zero_page_address);
683     assert!(addr.get_bit_range_u64(0..9) == 0);
684     th.set_field(
685         clb097::TEXHEAD_BL_ADDRESS_BITS31TO9,
686         addr.get_bit_range_u64(9..32),
687     );
688     th.set_field(
689         clb097::TEXHEAD_BL_ADDRESS_BITS47TO32,
690         addr.get_bit_range_u64(32..48),
691     );
692     assert!(addr.get_bit_range_u64(48..64) == 0);
693 
694     set_enum!(th, clb097, TEXHEAD_BL_TEXTURE_TYPE, TWO_D_ARRAY);
695     set_enum!(th, clb097, TEXHEAD_BL_BORDER_SIZE, BORDER_SAMPLER_COLOR);
696     th.set_field(cl9097::TEXHEADV2_NORMALIZED_COORDS, true);
697 
698     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MIN_MIP_LEVEL, 1_u8);
699     th.set_field(cl9097::TEXHEADV2_RES_VIEW_MAX_MIP_LEVEL, 0_u8);
700 
701     // This is copied from the D3D12 driver. I have no idea what these bits do
702     // or if they even do anything.
703     th.set_field(clb097::TEXHEAD_BL_RESERVED4A, 0x4_u8);
704     th.set_field(clb097::TEXHEAD_BL_RESERVED7Y, 0x80_u8);
705 }
706 
fill_null_tic( dev: &nil_rs_bindings::nv_device_info, zero_page_address: u64, desc_out: &mut [u32; 8], )707 pub fn fill_null_tic(
708     dev: &nil_rs_bindings::nv_device_info,
709     zero_page_address: u64,
710     desc_out: &mut [u32; 8],
711 ) {
712     if dev.cls_eng3d >= VOLTA_A {
713         // On Volta+, we can just fill with zeros
714         *desc_out = [0; 8]
715     } else if dev.cls_eng3d >= MAXWELL_A {
716         nvb097_fill_null_tic(zero_page_address, desc_out)
717     } else if dev.cls_eng3d >= FERMI_A {
718         nv9097_fill_null_tic(zero_page_address, desc_out)
719     } else {
720         panic!("Tesla and older not supported");
721     }
722 }
723 
724 #[no_mangle]
nil_fill_null_tic( dev: &nil_rs_bindings::nv_device_info, zero_page_address: u64, desc_out: &mut [u32; 8], )725 pub extern "C" fn nil_fill_null_tic(
726     dev: &nil_rs_bindings::nv_device_info,
727     zero_page_address: u64,
728     desc_out: &mut [u32; 8],
729 ) {
730     fill_null_tic(dev, zero_page_address, desc_out);
731 }
732