1 // Copyright © 2024 Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 #![allow(unused_macros)]
5
6 extern crate bitview;
7 extern crate nvidia_headers;
8
9 use bitview::*;
10 use nil_rs_bindings::*;
11 use nvidia_headers::classes::cl9097::tex as cl9097;
12 use nvidia_headers::classes::cl9097::FERMI_A;
13 use nvidia_headers::classes::clb097::tex as clb097;
14 use nvidia_headers::classes::clb097::MAXWELL_A;
15 use nvidia_headers::classes::clc097::tex as clc097;
16 use nvidia_headers::classes::clc097::PASCAL_A;
17 use nvidia_headers::classes::clc397::VOLTA_A;
18 use paste::paste;
19 use std::ops::Range;
20
21 use crate::extent::{units, Extent4D};
22 use crate::format::Format;
23 use crate::image::Image;
24 use crate::image::ImageDim;
25 use crate::image::SampleLayout;
26 use crate::image::View;
27 use crate::image::ViewType;
28 use crate::tiling::GOBType;
29
30 macro_rules! set_enum {
31 ($th:expr, $cls:ident, $field:ident, $enum:ident) => {
32 paste! {
33 $th.set_field($cls::$field, $cls::[<$field _ $enum>])
34 }
35 };
36 }
37
38 trait SetUFixed {
set_ufixed(&mut self, range: Range<usize>, val: f32)39 fn set_ufixed(&mut self, range: Range<usize>, val: f32);
40 }
41
42 const FIXED_FRAC_BITS: u32 = 8;
43
44 impl<T: SetFieldU64> SetUFixed for T {
set_ufixed(&mut self, range: Range<usize>, val: f32)45 fn set_ufixed(&mut self, range: Range<usize>, val: f32) {
46 assert!(range.len() >= FIXED_FRAC_BITS as usize);
47 let scaled = val * ((1 << FIXED_FRAC_BITS) as f32);
48 let scaled_max = ((1 << range.len()) - 1) as f32;
49 let u_val = scaled.clamp(0.0, scaled_max).round() as u32;
50 self.set_field(range, u_val);
51 }
52 }
53
nv9097_th_v2_source( fmt: &nil_tic_format, swizzle: pipe_swizzle, is_int: bool, ) -> u3254 fn nv9097_th_v2_source(
55 fmt: &nil_tic_format,
56 swizzle: pipe_swizzle,
57 is_int: bool,
58 ) -> u32 {
59 match swizzle {
60 PIPE_SWIZZLE_X => fmt.src_x(),
61 PIPE_SWIZZLE_Y => fmt.src_y(),
62 PIPE_SWIZZLE_Z => fmt.src_z(),
63 PIPE_SWIZZLE_W => fmt.src_w(),
64 PIPE_SWIZZLE_0 => cl9097::TEXHEADV2_X_SOURCE_IN_ZERO,
65 PIPE_SWIZZLE_1 => {
66 if is_int {
67 cl9097::TEXHEADV2_X_SOURCE_IN_ONE_INT
68 } else {
69 cl9097::TEXHEADV2_X_SOURCE_IN_ONE_FLOAT
70 }
71 }
72 other => panic!("Invalid component swizzle {}", other),
73 }
74 }
75
nvb097_th_bl_source( fmt: &nil_tic_format, swizzle: pipe_swizzle, is_int: bool, ) -> u3276 fn nvb097_th_bl_source(
77 fmt: &nil_tic_format,
78 swizzle: pipe_swizzle,
79 is_int: bool,
80 ) -> u32 {
81 match swizzle {
82 PIPE_SWIZZLE_X => fmt.src_x(),
83 PIPE_SWIZZLE_Y => fmt.src_y(),
84 PIPE_SWIZZLE_Z => fmt.src_z(),
85 PIPE_SWIZZLE_W => fmt.src_w(),
86 PIPE_SWIZZLE_0 => clb097::TEXHEADV2_X_SOURCE_IN_ZERO,
87 PIPE_SWIZZLE_1 => {
88 if is_int {
89 clb097::TEXHEADV2_X_SOURCE_IN_ONE_INT
90 } else {
91 clb097::TEXHEADV2_X_SOURCE_IN_ONE_FLOAT
92 }
93 }
94 other => panic!("Invalid component swizzle {}", other),
95 }
96 }
97
98 type THBitView<'a> = BitMutView<'a, [u32; 8]>;
99
nv9097_set_th_v2_0<'a>( th: &mut THBitView<'a>, format: &Format, swizzle: [nil_rs_bindings::pipe_swizzle; 4], )100 fn nv9097_set_th_v2_0<'a>(
101 th: &mut THBitView<'a>,
102 format: &Format,
103 swizzle: [nil_rs_bindings::pipe_swizzle; 4],
104 ) {
105 let fmt = &format.info().tic;
106 let is_int = format.is_integer();
107 let source = [
108 nv9097_th_v2_source(fmt, swizzle[0], is_int),
109 nv9097_th_v2_source(fmt, swizzle[1], is_int),
110 nv9097_th_v2_source(fmt, swizzle[2], is_int),
111 nv9097_th_v2_source(fmt, swizzle[3], is_int),
112 ];
113
114 th.set_field(cl9097::TEXHEADV2_COMPONENT_SIZES, fmt.comp_sizes());
115 th.set_field(cl9097::TEXHEADV2_R_DATA_TYPE, fmt.type_r());
116 th.set_field(cl9097::TEXHEADV2_G_DATA_TYPE, fmt.type_g());
117 th.set_field(cl9097::TEXHEADV2_B_DATA_TYPE, fmt.type_b());
118 th.set_field(cl9097::TEXHEADV2_A_DATA_TYPE, fmt.type_a());
119 th.set_field(cl9097::TEXHEADV2_X_SOURCE, source[0]);
120 th.set_field(cl9097::TEXHEADV2_Y_SOURCE, source[1]);
121 th.set_field(cl9097::TEXHEADV2_Z_SOURCE, source[2]);
122 th.set_field(cl9097::TEXHEADV2_W_SOURCE, source[3]);
123 }
124
nvb097_set_th_bl_0<'a>( th: &mut THBitView<'a>, format: &Format, swizzle: [nil_rs_bindings::pipe_swizzle; 4], )125 fn nvb097_set_th_bl_0<'a>(
126 th: &mut THBitView<'a>,
127 format: &Format,
128 swizzle: [nil_rs_bindings::pipe_swizzle; 4],
129 ) {
130 let fmt = &format.info().tic;
131 let is_int = format.is_integer();
132 let source = [
133 nvb097_th_bl_source(fmt, swizzle[0], is_int),
134 nvb097_th_bl_source(fmt, swizzle[1], is_int),
135 nvb097_th_bl_source(fmt, swizzle[2], is_int),
136 nvb097_th_bl_source(fmt, swizzle[3], is_int),
137 ];
138
139 th.set_field(clb097::TEXHEAD_BL_COMPONENTS, fmt.comp_sizes());
140 th.set_field(clb097::TEXHEAD_BL_R_DATA_TYPE, fmt.type_r());
141 th.set_field(clb097::TEXHEAD_BL_G_DATA_TYPE, fmt.type_g());
142 th.set_field(clb097::TEXHEAD_BL_B_DATA_TYPE, fmt.type_b());
143 th.set_field(clb097::TEXHEAD_BL_A_DATA_TYPE, fmt.type_a());
144 th.set_field(clb097::TEXHEAD_BL_X_SOURCE, source[0]);
145 th.set_field(clb097::TEXHEAD_BL_Y_SOURCE, source[1]);
146 th.set_field(clb097::TEXHEAD_BL_Z_SOURCE, source[2]);
147 th.set_field(clb097::TEXHEAD_BL_W_SOURCE, source[3]);
148 }
149
pipe_to_nv_texture_type(ty: ViewType) -> u32150 fn pipe_to_nv_texture_type(ty: ViewType) -> u32 {
151 match ty {
152 ViewType::_1D => clb097::TEXHEAD_BL_TEXTURE_TYPE_ONE_D,
153 ViewType::_2D => clb097::TEXHEAD_BL_TEXTURE_TYPE_TWO_D,
154 ViewType::_3D | ViewType::_3DSliced => {
155 clb097::TEXHEAD_BL_TEXTURE_TYPE_THREE_D
156 }
157 ViewType::Cube => clb097::TEXHEAD_BL_TEXTURE_TYPE_CUBEMAP,
158 ViewType::_1DArray => clb097::TEXHEAD_BL_TEXTURE_TYPE_ONE_D_ARRAY,
159 ViewType::_2DArray => clb097::TEXHEAD_BL_TEXTURE_TYPE_TWO_D_ARRAY,
160 ViewType::CubeArray => clb097::TEXHEAD_BL_TEXTURE_TYPE_CUBEMAP_ARRAY,
161 }
162 }
163
nil_rs_to_nv9097_multi_sample_count(sample_layout: SampleLayout) -> u32164 fn nil_rs_to_nv9097_multi_sample_count(sample_layout: SampleLayout) -> u32 {
165 match sample_layout {
166 SampleLayout::_1x1 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_1X1,
167 SampleLayout::_2x1 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_2X1,
168 SampleLayout::_2x1D3d => {
169 cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_2X1_D3D
170 }
171 SampleLayout::_2x2 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_2X2,
172 SampleLayout::_4x2 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_4X2,
173 SampleLayout::_4x2D3d => {
174 cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_4X2_D3D
175 }
176 SampleLayout::_4x4 => cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT_MODE_4X4,
177 SampleLayout::Invalid => panic!("Invalid sample layout"),
178 }
179 }
180
nil_rs_to_nvb097_multi_sample_count(sample_layout: SampleLayout) -> u32181 fn nil_rs_to_nvb097_multi_sample_count(sample_layout: SampleLayout) -> u32 {
182 match sample_layout {
183 SampleLayout::_1x1 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_1X1,
184 SampleLayout::_2x1 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_2X1,
185 SampleLayout::_2x1D3d => {
186 clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_2X1_D3D
187 }
188 SampleLayout::_2x2 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_2X2,
189 SampleLayout::_4x2 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_4X2,
190 SampleLayout::_4x2D3d => {
191 clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_4X2_D3D
192 }
193 SampleLayout::_4x4 => clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT_MODE_4X4,
194 SampleLayout::Invalid => panic!("Invalid sample layout"),
195 }
196 }
197
nil_rs_max_mip_level(image: &Image, view: &View) -> u32198 fn nil_rs_max_mip_level(image: &Image, view: &View) -> u32 {
199 if view.view_type != ViewType::_3D
200 && view.array_len == 1
201 && view.base_level == 0
202 && view.num_levels == 1
203 {
204 // The Unnormalized coordinates bit in the sampler gets ignored if the
205 // referenced image has more than one miplevel. Fortunately, Vulkan has
206 // restrictions requiring the view to be a single-layer single-LOD view
207 // in order to use nonnormalizedCoordinates = VK_TRUE in the sampler.
208 // From the Vulkan 1.3.255 spec:
209 //
210 // "When unnormalizedCoordinates is VK_TRUE, images the sampler is
211 // used with in the shader have the following requirements:
212 //
213 // - The viewType must be either VK_IMAGE_VIEW_TYPE_1D or
214 // VK_IMAGE_VIEW_TYPE_2D.
215 // - The image view must have a single layer and a single mip
216 // level."
217 //
218 // Under these conditions, the view is simply LOD 0 of a single array
219 // slice so we don't need to care about aray stride between slices so
220 // it's safe to set the number of miplevels to 0 regardless of how many
221 // the image actually has.
222 0
223 } else {
224 image.num_levels - 1
225 }
226 }
227
normalize_extent(image: &Image, view: &View) -> Extent4D<units::Pixels>228 fn normalize_extent(image: &Image, view: &View) -> Extent4D<units::Pixels> {
229 let mut extent = image.extent_px;
230 match view.view_type {
231 ViewType::_1D
232 | ViewType::_2D
233 | ViewType::_1DArray
234 | ViewType::_2DArray => {
235 assert!(image.extent_px.depth == 1);
236 extent.depth = view.array_len;
237 }
238 ViewType::_3D => {
239 assert!(image.dim == ImageDim::_3D);
240 extent.depth = image.extent_px.depth;
241 }
242 ViewType::Cube | ViewType::CubeArray => {
243 assert!(image.dim == ImageDim::_2D);
244 assert!(view.array_len % 6 == 0);
245 extent.depth = view.array_len / 6;
246 }
247 ViewType::_3DSliced => {
248 assert!(image.dim == ImageDim::_3D);
249 extent.depth = view.array_len;
250 }
251 }
252 extent.array_len = 0;
253
254 extent
255 }
256
nv9097_fill_tic( image: &Image, view: &View, base_address: u64, desc_out: &mut [u32; 8], )257 fn nv9097_fill_tic(
258 image: &Image,
259 view: &View,
260 base_address: u64,
261 desc_out: &mut [u32; 8],
262 ) {
263 assert!(image.format.el_size_B() == view.format.el_size_B());
264 assert!(view.base_level + view.num_levels <= image.num_levels);
265 assert!(
266 view.base_array_layer + view.array_len <= image.extent_px.array_len
267 );
268
269 *desc_out = [0u32; 8];
270 let mut th = BitMutView::new(desc_out);
271
272 th.set_field(cl9097::TEXHEADV2_USE_TEXTURE_HEADER_VERSION2, true);
273 nv9097_set_th_v2_0(&mut th, &view.format, view.swizzle);
274
275 // There's no base layer field in the texture header
276 let layer_address = base_address
277 + u64::from(view.base_array_layer)
278 + u64::from(image.array_stride_B);
279
280 th.set_field(cl9097::TEXHEADV2_OFFSET_LOWER, layer_address as u32);
281 th.set_field(cl9097::TEXHEADV2_OFFSET_UPPER, (layer_address >> 32) as u32);
282
283 let tiling = &image.levels[0].tiling;
284
285 if tiling.is_tiled() {
286 set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, BLOCKLINEAR);
287
288 assert!(tiling.gob_type == GOBType::Fermi8);
289 assert!(tiling.x_log2 == 0);
290 set_enum!(th, cl9097, TEXHEADV2_GOBS_PER_BLOCK_WIDTH, ONE_GOB);
291 th.set_field(cl9097::TEXHEADV2_GOBS_PER_BLOCK_HEIGHT, tiling.y_log2);
292 th.set_field(cl9097::TEXHEADV2_GOBS_PER_BLOCK_DEPTH, tiling.z_log2);
293
294 let nv_tex_type = pipe_to_nv_texture_type(view.view_type);
295 th.set_field(cl9097::TEXHEADV2_TEXTURE_TYPE, nv_tex_type);
296 } else {
297 set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, PITCH);
298
299 let pitch = image.levels[0].row_stride_B;
300 th.set_field(cl9097::TEXHEADV2_PITCH, pitch);
301
302 assert!(
303 view.view_type == ViewType::_2D
304 || view.view_type == ViewType::_2DArray
305 );
306 assert!(image.sample_layout == SampleLayout::_1x1);
307 assert!(view.num_levels == 1);
308 set_enum!(th, cl9097, TEXHEADV2_TEXTURE_TYPE, TWO_D_NO_MIPMAP);
309 }
310
311 set_enum!(th, cl9097, TEXHEADV2_LOD_ANISO_QUALITY, LOD_QUALITY_HIGH);
312 set_enum!(th, cl9097, TEXHEADV2_LOD_ISO_QUALITY, LOD_QUALITY_HIGH);
313 set_enum!(
314 th,
315 cl9097,
316 TEXHEADV2_ANISO_COARSE_SPREAD_MODIFIER,
317 SPREAD_MODIFIER_NONE
318 );
319
320 let extent = normalize_extent(image, view);
321 th.set_field(cl9097::TEXHEADV2_WIDTH, extent.width);
322 th.set_field(cl9097::TEXHEADV2_HEIGHT, extent.height);
323 th.set_field(cl9097::TEXHEADV2_DEPTH, extent.depth);
324
325 let max_mip_level = nil_rs_max_mip_level(image, view);
326 th.set_field(cl9097::TEXHEADV2_MAX_MIP_LEVEL, max_mip_level);
327
328 th.set_field(cl9097::TEXHEADV2_S_R_G_B_CONVERSION, view.format.is_srgb());
329
330 set_enum!(th, cl9097, TEXHEADV2_BORDER_SOURCE, BORDER_COLOR);
331
332 // In the sampler, the two options for FLOAT_COORD_NORMALIZATION are:
333 //
334 // - FORCE_UNNORMALIZED_COORDS
335 // - USE_HEADER_SETTING
336 //
337 // So we set it to normalized in the header and let the sampler select that
338 // or force non-normalized.
339 th.set_field(cl9097::TEXHEADV2_NORMALIZED_COORDS, true);
340
341 set_enum!(
342 th,
343 cl9097,
344 TEXHEADV2_ANISO_FINE_SPREAD_FUNC,
345 SPREAD_FUNC_TWO
346 );
347 set_enum!(
348 th,
349 cl9097,
350 TEXHEADV2_ANISO_COARSE_SPREAD_FUNC,
351 SPREAD_FUNC_ONE
352 );
353
354 th.set_field(cl9097::TEXHEADV2_RES_VIEW_MIN_MIP_LEVEL, view.base_level);
355 th.set_field(
356 cl9097::TEXHEADV2_RES_VIEW_MAX_MIP_LEVEL,
357 view.num_levels + view.base_level - 1,
358 );
359
360 let msc = nil_rs_to_nv9097_multi_sample_count(image.sample_layout);
361 th.set_field(cl9097::TEXHEADV2_MULTI_SAMPLE_COUNT, msc);
362
363 let min_lod_clamp = view.min_lod_clamp - (view.base_level as f32);
364 th.set_ufixed(cl9097::TEXHEADV2_MIN_LOD_CLAMP, min_lod_clamp);
365 }
366
nvb097_fill_tic( dev: &nil_rs_bindings::nv_device_info, image: &Image, view: &View, base_address: u64, desc_out: &mut [u32; 8], )367 fn nvb097_fill_tic(
368 dev: &nil_rs_bindings::nv_device_info,
369 image: &Image,
370 view: &View,
371 base_address: u64,
372 desc_out: &mut [u32; 8],
373 ) {
374 assert!(image.format.el_size_B() == view.format.el_size_B());
375 assert!(view.base_level + view.num_levels <= image.num_levels);
376
377 *desc_out = [0u32; 8];
378 let mut th = BitMutView::new(desc_out);
379
380 nvb097_set_th_bl_0(&mut th, &view.format, view.swizzle);
381
382 let tiling = &image.levels[0].tiling;
383
384 // There's no base layer field in the texture header
385 let mut layer_address = base_address;
386 if view.view_type == ViewType::_3DSliced {
387 assert!(view.num_levels == 1);
388 assert!(
389 view.base_array_layer + view.array_len <= image.extent_px.depth
390 );
391
392 layer_address +=
393 image.level_z_offset_B(view.base_level, view.base_array_layer);
394 } else {
395 assert!(
396 view.base_array_layer + view.array_len <= image.extent_px.array_len
397 );
398 layer_address +=
399 u64::from(view.base_array_layer) * u64::from(image.array_stride_B);
400 }
401
402 if tiling.is_tiled() {
403 set_enum!(th, clb097, TEXHEAD_BL_HEADER_VERSION, SELECT_BLOCKLINEAR);
404
405 let addr = BitView::new(&layer_address);
406 assert!(addr.get_bit_range_u64(0..9) == 0);
407 th.set_field(
408 clb097::TEXHEAD_BL_ADDRESS_BITS31TO9,
409 addr.get_bit_range_u64(9..32),
410 );
411 th.set_field(
412 clb097::TEXHEAD_BL_ADDRESS_BITS47TO32,
413 addr.get_bit_range_u64(32..48),
414 );
415 assert!(addr.get_bit_range_u64(48..64) == 0);
416
417 assert!(tiling.gob_type == GOBType::Fermi8);
418
419 set_enum!(th, clb097, TEXHEAD_BL_GOBS_PER_BLOCK_WIDTH, ONE_GOB);
420 th.set_field(clb097::TEXHEAD_BL_GOBS_PER_BLOCK_HEIGHT, tiling.y_log2);
421 th.set_field(clb097::TEXHEAD_BL_GOBS_PER_BLOCK_DEPTH, tiling.z_log2);
422 th.set_field(clb097::TEXHEAD_BL_TILE_WIDTH_IN_GOBS, tiling.x_log2);
423
424 let nv_text_type = pipe_to_nv_texture_type(view.view_type);
425 th.set_field(clb097::TEXHEAD_BL_TEXTURE_TYPE, nv_text_type);
426 } else {
427 set_enum!(th, clb097, TEXHEAD_BL_HEADER_VERSION, SELECT_PITCH);
428
429 let addr = BitView::new(&layer_address);
430 assert!(addr.get_bit_range_u64(0..5) == 0);
431 th.set_field(
432 clb097::TEXHEAD_PITCH_ADDRESS_BITS31TO5,
433 addr.get_bit_range_u64(5..32),
434 );
435 th.set_field(
436 clb097::TEXHEAD_PITCH_ADDRESS_BITS47TO32,
437 addr.get_bit_range_u64(32..48),
438 );
439 assert!(addr.get_bit_range_u64(48..64) == 0);
440
441 let pitch = image.levels[0].row_stride_B;
442 let pitch = BitView::new(&pitch);
443 assert!(pitch.get_bit_range_u64(0..5) == 0);
444 assert!(pitch.get_bit_range_u64(21..32) == 0);
445 th.set_field(
446 clb097::TEXHEAD_PITCH_PITCH_BITS20TO5,
447 pitch.get_bit_range_u64(5..21),
448 );
449
450 assert!(
451 view.view_type == ViewType::_2D
452 || view.view_type == ViewType::_2DArray
453 );
454 assert!(image.sample_layout == SampleLayout::_1x1);
455 assert!(view.num_levels == 1);
456 set_enum!(th, clb097, TEXHEAD_PITCH_TEXTURE_TYPE, TWO_D_NO_MIPMAP);
457 }
458
459 th.set_field(clb097::TEXHEAD_BL_LOD_ANISO_QUALITY2, true);
460 set_enum!(th, clb097, TEXHEAD_BL_LOD_ANISO_QUALITY, LOD_QUALITY_HIGH);
461 set_enum!(th, clb097, TEXHEAD_BL_LOD_ISO_QUALITY, LOD_QUALITY_HIGH);
462 set_enum!(
463 th,
464 clb097,
465 TEXHEAD_BL_ANISO_COARSE_SPREAD_MODIFIER,
466 SPREAD_MODIFIER_NONE
467 );
468
469 let extent = normalize_extent(image, view);
470 th.set_field(clb097::TEXHEAD_BL_WIDTH_MINUS_ONE, extent.width - 1);
471
472 if dev.cls_eng3d >= PASCAL_A {
473 let height_1 = extent.height - 1;
474 let depth_1 = extent.depth - 1;
475 th.set_field(clc097::TEXHEAD_BL_HEIGHT_MINUS_ONE, height_1 & 0xffff);
476 th.set_field(clc097::TEXHEAD_BL_HEIGHT_MINUS_ONE_BIT16, height_1 >> 16);
477 th.set_field(clc097::TEXHEAD_BL_DEPTH_MINUS_ONE, depth_1 & 0x3fff);
478 th.set_field(clc097::TEXHEAD_BL_DEPTH_MINUS_ONE_BIT14, depth_1 >> 14);
479 } else {
480 th.set_field(clb097::TEXHEAD_BL_HEIGHT_MINUS_ONE, extent.height - 1);
481 th.set_field(clb097::TEXHEAD_BL_DEPTH_MINUS_ONE, extent.depth - 1);
482 }
483
484 let max_mip_level = nil_rs_max_mip_level(image, view);
485 th.set_field(clb097::TEXHEAD_BL_MAX_MIP_LEVEL, max_mip_level);
486
487 th.set_field(clb097::TEXHEAD_BL_S_R_G_B_CONVERSION, view.format.is_srgb());
488
489 set_enum!(th, clb097, TEXHEAD_BL_SECTOR_PROMOTION, PROMOTE_TO_2_V);
490 set_enum!(th, clb097, TEXHEAD_BL_BORDER_SIZE, BORDER_SAMPLER_COLOR);
491
492 // In the sampler, the two options for FLOAT_COORD_NORMALIZATION are:
493 //
494 // - FORCE_UNNORMALIZED_COORDS
495 // - USE_HEADER_SETTING
496 //
497 // So we set it to normalized in the header and let the sampler select that
498 // or force non-normalized.
499
500 th.set_field(clb097::TEXHEAD_BL_NORMALIZED_COORDS, true);
501 set_enum!(
502 th,
503 clb097,
504 TEXHEAD_BL_ANISO_FINE_SPREAD_FUNC,
505 SPREAD_FUNC_TWO
506 );
507 set_enum!(
508 th,
509 clb097,
510 TEXHEAD_BL_ANISO_COARSE_SPREAD_FUNC,
511 SPREAD_FUNC_ONE
512 );
513
514 th.set_field(clb097::TEXHEAD_BL_RES_VIEW_MIN_MIP_LEVEL, view.base_level);
515
516 let max_mip_level = view.num_levels + view.base_level - 1;
517 th.set_field(clb097::TEXHEAD_BL_RES_VIEW_MAX_MIP_LEVEL, max_mip_level);
518
519 let msc = nil_rs_to_nvb097_multi_sample_count(image.sample_layout);
520 th.set_field(clb097::TEXHEAD_BL_MULTI_SAMPLE_COUNT, msc);
521
522 let min_lod_clamp = view.min_lod_clamp - (view.base_level as f32);
523 th.set_ufixed(clb097::TEXHEAD_BL_MIN_LOD_CLAMP, min_lod_clamp);
524 }
525
526 pub const IDENTITY_SWIZZLE: [nil_rs_bindings::pipe_swizzle; 4] = [
527 nil_rs_bindings::PIPE_SWIZZLE_X,
528 nil_rs_bindings::PIPE_SWIZZLE_Y,
529 nil_rs_bindings::PIPE_SWIZZLE_Z,
530 nil_rs_bindings::PIPE_SWIZZLE_W,
531 ];
532
nv9097_nil_fill_buffer_tic( base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )533 fn nv9097_nil_fill_buffer_tic(
534 base_address: u64,
535 format: Format,
536 num_elements: u32,
537 desc_out: &mut [u32; 8],
538 ) {
539 *desc_out = [0u32; 8];
540 let mut th = BitMutView::new(desc_out);
541 th.set_field(cl9097::TEXHEADV2_USE_TEXTURE_HEADER_VERSION2, true);
542
543 assert!(format.supports_buffer());
544 nv9097_set_th_v2_0(&mut th, &format, IDENTITY_SWIZZLE);
545
546 th.set_field(cl9097::TEXHEADV2_OFFSET_LOWER, base_address as u32);
547 th.set_field(cl9097::TEXHEADV2_OFFSET_UPPER, (base_address >> 32) as u32);
548
549 set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, PITCH);
550
551 th.set_field(cl9097::TEXHEADV2_WIDTH, num_elements);
552
553 set_enum!(th, cl9097, TEXHEADV2_TEXTURE_TYPE, ONE_D_BUFFER);
554 }
555
nvb097_nil_fill_buffer_tic( base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )556 fn nvb097_nil_fill_buffer_tic(
557 base_address: u64,
558 format: Format,
559 num_elements: u32,
560 desc_out: &mut [u32; 8],
561 ) {
562 *desc_out = [0u32; 8];
563 let mut th = BitMutView::new(desc_out);
564
565 assert!(format.supports_buffer());
566 nvb097_set_th_bl_0(&mut th, &format, IDENTITY_SWIZZLE);
567
568 th.set_field(clb097::TEXHEAD_1D_ADDRESS_BITS31TO0, base_address as u32);
569 th.set_field(clb097::TEXHEAD_1D_ADDRESS_BITS47TO32, base_address >> 32);
570
571 set_enum!(th, clb097, TEXHEAD_1D_HEADER_VERSION, SELECT_ONE_D_BUFFER);
572
573 th.set_field(
574 clb097::TEXHEAD_1D_WIDTH_MINUS_ONE_BITS15TO0,
575 (num_elements - 1) & 0xffff,
576 );
577 th.set_field(
578 clb097::TEXHEAD_1D_WIDTH_MINUS_ONE_BITS31TO16,
579 (num_elements - 1) >> 16,
580 );
581
582 set_enum!(th, clb097, TEXHEAD_1D_TEXTURE_TYPE, ONE_D_BUFFER);
583
584 // TODO: Do we need this?
585 set_enum!(th, clb097, TEXHEAD_1D_SECTOR_PROMOTION, PROMOTE_TO_2_V);
586 }
587
588 impl Image {
589 #[no_mangle]
nil_image_fill_tic( &self, dev: &nil_rs_bindings::nv_device_info, view: &View, base_address: u64, desc_out: &mut [u32; 8], )590 pub extern "C" fn nil_image_fill_tic(
591 &self,
592 dev: &nil_rs_bindings::nv_device_info,
593 view: &View,
594 base_address: u64,
595 desc_out: &mut [u32; 8],
596 ) {
597 self.fill_tic(dev, view, base_address, desc_out);
598 }
599
fill_tic( &self, dev: &nil_rs_bindings::nv_device_info, view: &View, base_address: u64, desc_out: &mut [u32; 8], )600 pub fn fill_tic(
601 &self,
602 dev: &nil_rs_bindings::nv_device_info,
603 view: &View,
604 base_address: u64,
605 desc_out: &mut [u32; 8],
606 ) {
607 if dev.cls_eng3d >= MAXWELL_A {
608 nvb097_fill_tic(dev, self, view, base_address, desc_out);
609 } else if dev.cls_eng3d >= FERMI_A {
610 nv9097_fill_tic(self, view, base_address, desc_out);
611 } else {
612 panic!("Tesla and older not supported");
613 }
614 }
615 }
616
617 #[no_mangle]
nil_buffer_fill_tic( dev: &nil_rs_bindings::nv_device_info, base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )618 pub extern "C" fn nil_buffer_fill_tic(
619 dev: &nil_rs_bindings::nv_device_info,
620 base_address: u64,
621 format: Format,
622 num_elements: u32,
623 desc_out: &mut [u32; 8],
624 ) {
625 fill_buffer_tic(dev, base_address, format, num_elements, desc_out);
626 }
627
fill_buffer_tic( dev: &nil_rs_bindings::nv_device_info, base_address: u64, format: Format, num_elements: u32, desc_out: &mut [u32; 8], )628 pub fn fill_buffer_tic(
629 dev: &nil_rs_bindings::nv_device_info,
630 base_address: u64,
631 format: Format,
632 num_elements: u32,
633 desc_out: &mut [u32; 8],
634 ) {
635 if dev.cls_eng3d >= MAXWELL_A {
636 nvb097_nil_fill_buffer_tic(base_address, format, num_elements, desc_out)
637 } else if dev.cls_eng3d >= FERMI_A {
638 nv9097_nil_fill_buffer_tic(base_address, format, num_elements, desc_out)
639 } else {
640 panic!("Tesla and older not supported");
641 }
642 }
643
644 pub const ZERO_SWIZZLE: [nil_rs_bindings::pipe_swizzle; 4] = [
645 nil_rs_bindings::PIPE_SWIZZLE_0,
646 nil_rs_bindings::PIPE_SWIZZLE_0,
647 nil_rs_bindings::PIPE_SWIZZLE_0,
648 nil_rs_bindings::PIPE_SWIZZLE_0,
649 ];
650
nv9097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8])651 fn nv9097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8]) {
652 *desc_out = [0u32; 8];
653 let mut th = BitMutView::new(desc_out);
654
655 th.set_field(cl9097::TEXHEADV2_USE_TEXTURE_HEADER_VERSION2, true);
656 let format = Format::try_from(PIPE_FORMAT_R8_UNORM).unwrap();
657 nvb097_set_th_bl_0(&mut th, &format, ZERO_SWIZZLE);
658
659 th.set_field(cl9097::TEXHEADV2_OFFSET_LOWER, zero_page_address as u32);
660 th.set_field(
661 cl9097::TEXHEADV2_OFFSET_UPPER,
662 (zero_page_address >> 32) as u32,
663 );
664
665 set_enum!(th, cl9097, TEXHEADV2_MEMORY_LAYOUT, BLOCKLINEAR);
666 set_enum!(th, cl9097, TEXHEADV2_TEXTURE_TYPE, TWO_D_ARRAY);
667 th.set_field(cl9097::TEXHEADV2_NORMALIZED_COORDS, true);
668
669 th.set_field(cl9097::TEXHEADV2_RES_VIEW_MIN_MIP_LEVEL, 1_u8);
670 th.set_field(cl9097::TEXHEADV2_RES_VIEW_MAX_MIP_LEVEL, 0_u8);
671 }
672
nvb097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8])673 fn nvb097_fill_null_tic(zero_page_address: u64, desc_out: &mut [u32; 8]) {
674 *desc_out = [0u32; 8];
675 let mut th = BitMutView::new(desc_out);
676
677 let format = Format::try_from(PIPE_FORMAT_R8_UNORM).unwrap();
678 nvb097_set_th_bl_0(&mut th, &format, ZERO_SWIZZLE);
679
680 set_enum!(th, clb097, TEXHEAD_BL_HEADER_VERSION, SELECT_BLOCKLINEAR);
681
682 let addr = BitView::new(&zero_page_address);
683 assert!(addr.get_bit_range_u64(0..9) == 0);
684 th.set_field(
685 clb097::TEXHEAD_BL_ADDRESS_BITS31TO9,
686 addr.get_bit_range_u64(9..32),
687 );
688 th.set_field(
689 clb097::TEXHEAD_BL_ADDRESS_BITS47TO32,
690 addr.get_bit_range_u64(32..48),
691 );
692 assert!(addr.get_bit_range_u64(48..64) == 0);
693
694 set_enum!(th, clb097, TEXHEAD_BL_TEXTURE_TYPE, TWO_D_ARRAY);
695 set_enum!(th, clb097, TEXHEAD_BL_BORDER_SIZE, BORDER_SAMPLER_COLOR);
696 th.set_field(cl9097::TEXHEADV2_NORMALIZED_COORDS, true);
697
698 th.set_field(cl9097::TEXHEADV2_RES_VIEW_MIN_MIP_LEVEL, 1_u8);
699 th.set_field(cl9097::TEXHEADV2_RES_VIEW_MAX_MIP_LEVEL, 0_u8);
700
701 // This is copied from the D3D12 driver. I have no idea what these bits do
702 // or if they even do anything.
703 th.set_field(clb097::TEXHEAD_BL_RESERVED4A, 0x4_u8);
704 th.set_field(clb097::TEXHEAD_BL_RESERVED7Y, 0x80_u8);
705 }
706
fill_null_tic( dev: &nil_rs_bindings::nv_device_info, zero_page_address: u64, desc_out: &mut [u32; 8], )707 pub fn fill_null_tic(
708 dev: &nil_rs_bindings::nv_device_info,
709 zero_page_address: u64,
710 desc_out: &mut [u32; 8],
711 ) {
712 if dev.cls_eng3d >= VOLTA_A {
713 // On Volta+, we can just fill with zeros
714 *desc_out = [0; 8]
715 } else if dev.cls_eng3d >= MAXWELL_A {
716 nvb097_fill_null_tic(zero_page_address, desc_out)
717 } else if dev.cls_eng3d >= FERMI_A {
718 nv9097_fill_null_tic(zero_page_address, desc_out)
719 } else {
720 panic!("Tesla and older not supported");
721 }
722 }
723
724 #[no_mangle]
nil_fill_null_tic( dev: &nil_rs_bindings::nv_device_info, zero_page_address: u64, desc_out: &mut [u32; 8], )725 pub extern "C" fn nil_fill_null_tic(
726 dev: &nil_rs_bindings::nv_device_info,
727 zero_page_address: u64,
728 desc_out: &mut [u32; 8],
729 ) {
730 fill_null_tic(dev, zero_page_address, desc_out);
731 }
732