• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright © 2024 Valve Corp. and Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3 
4 use crate::extent::{units, Extent4D, Offset4D};
5 use crate::tiling::Tiling;
6 
7 use std::ffi::c_void;
8 use std::ops::Range;
9 
10 // This file is dedicated to the internal tiling layout, mainly in the context
11 // of CPU-based tiled memcpy implementations (and helpers) for VK_EXT_host_image_copy
12 //
13 // Work here is based on isl_tiled_memcpy, fd6_tiled_memcpy, old work by Rebecca Mckeever,
14 // and https://fgiesen.wordpress.com/2011/01/17/texture-tiling-and-swizzling/
15 //
16 // On NVIDIA, the tiling system is a two-tier one, and images are first tiled in
17 // a grid of rows of tiles (called "Blocks") with one or more columns:
18 //
19 // +----------+----------+----------+----------+
20 // | Block 0  | Block 1  | Block 2  | Block 3  |
21 // +----------+----------+----------+----------+
22 // | Block 4  | Block 5  | Block 6  | Block 7  |
23 // +----------+----------+----------+----------+
24 // | Block 8  | Block 9  | Block 10 | Block 11 |
25 // +----------+----------+----------+----------+
26 //
27 // The blocks themselves are ordered linearly as can be seen above, which is
28 // where the "Block Linear" naming comes from for NVIDIA's tiling scheme.
29 //
30 // For 3D images, each block continues in the Z direction such that tiles
31 // contain multiple Z slices. If the image depth is longer than the
32 // block depth, there will be more than one layer of blocks, where a layer is
33 // made up of 1 or more Z slices. For example, if the above tile pattern was
34 // the first layer of a multilayer arrangement, the second layer would be:
35 //
36 // +----------+----------+----------+----------+
37 // | Block 12 | Block 13 | Block 14 | Block 15 |
38 // +----------+----------+----------+----------+
39 // | Block 16 | Block 17 | Block 18 | Block 19 |
40 // +----------+----------+----------+----------+
41 // | Block 20 | Block 21 | Block 22 | Block 23 |
42 // +----------+----------+----------+----------+
43 //
44 // The number of rows, columns, and layers of tiles can thus be deduced to be:
45 //    rows    >= ceiling(image_height / block_height)
46 //    columns >= ceiling(image_width  / block_width)
47 //    layers  >= ceiling(image_depth  / block_depth)
48 //
49 // Where block_width is a constant 64B (unless for sparse) and block_height
50 // can be either 8 or 16 GOBs tall (more on GOBs below). For us, block_depth
51 // is one for now.
52 //
53 // The >= is in case the blocks around the edges are partial.
54 //
55 // Now comes the second tier. Each block is composed of GOBs (Groups of Bytes)
56 // arranged in ascending order in a single column:
57 //
58 // +---------------------------+
59 // |           GOB 0           |
60 // +---------------------------+
61 // |           GOB 1           |
62 // +---------------------------+
63 // |           GOB 2           |
64 // +---------------------------+
65 // |           GOB 3           |
66 // +---------------------------+
67 //
68 // The number of GOBs in a full block is
69 //    block_height * block_depth
70 //
71 // An Ampere GOB is 512 bytes, arranged in a 64x8 layout and split into Sectors.
72 // Each Sector is 32 Bytes, and the Sectors in a GOB are arranged in a 16x2
73 // layout (i.e., two 16B lines on top of each other). It's then arranged into
74 // two columns that are 2 sectors by 4, leading to a 4x4 grid of sectors:
75 //
76 // +----------+----------+----------+----------+
77 // | Sector 0 | Sector 1 | Sector 0 | Sector 1 |
78 // +----------+----------+----------+----------+
79 // | Sector 2 | Sector 3 | Sector 2 | Sector 3 |
80 // +----------+----------+----------+----------+
81 // | Sector 4 | Sector 5 | Sector 4 | Sector 5 |
82 // +----------+----------+----------+----------+
83 // | Sector 6 | Sector 7 | Sector 6 | Sector 7 |
84 // +----------+----------+----------+----------+
85 //
86 // From the given pixel address equations in the Orin manual, we arrived at
87 // the following bit interleave pattern for the pixel address:
88 //
89 //      b8 b7 b6 b5 b4 b3 b2 b1 b0
90 //      --------------------------
91 //      x5 y2 y1 x4 y0 x3 x2 x1 x0
92 //
93 // Which would look something like this:
94 // fn get_pixel_offset(
95 //      x: usize,
96 //      y: usize,
97 //  ) -> usize {
98 //      (x & 15)       |
99 //      (y & 1)  << 4  |
100 //      (x & 16) << 1  |
101 //      (y & 2)  << 5  |
102 //      (x & 32) << 3
103 //  }
104 //
105 //
106 
107 // The way our implementation will work is by splitting an image into tiles, then
108 // each tile will be broken into its GOBs, and finally each GOB into sectors,
109 // where each sector will be copied into its position.
110 //
111 // For code sharing and cleanliness, we write everything to be very generic,
112 // so as to be shared between Linear <-> Tiled and Tiled <-> Linear paths, and
113 // (ab)use Rust's traits to specialize the last level (copy_gob/copy_whole_gob)
114 // for a particular direction.
115 //
116 // The copy_x and copy_whole_x distinction is made because if we can guarantee
117 // that tiles/gobs are whole and aligned, we can skip all bounds checking and
118 // copy things in fast and tight loops
119 
120 /// Copies a GOB
121 ///
122 /// This trait should be implemented twice for each GOB type, once for
123 /// tiled-to-linear and once for linear-to-tiled.  This allows to implement
124 /// the rest of tiled copies in a generic way.
125 trait CopyGOB {
126     const GOB_EXTENT_B: Extent4D<units::Bytes>;
127     const X_DIVISOR: u32;
128 
copy_gob( tiled: usize, linear: LinearPointer, start: Offset4D<units::Bytes>, end: Offset4D<units::Bytes>, )129     unsafe fn copy_gob(
130         tiled: usize,
131         linear: LinearPointer,
132         start: Offset4D<units::Bytes>,
133         end: Offset4D<units::Bytes>,
134     );
135 
136     // No bounding box for this one
copy_whole_gob(tiled: usize, linear: LinearPointer)137     unsafe fn copy_whole_gob(tiled: usize, linear: LinearPointer) {
138         Self::copy_gob(
139             tiled,
140             linear,
141             Offset4D::new(0, 0, 0, 0),
142             Offset4D::new(0, 0, 0, 0) + Self::GOB_EXTENT_B,
143         );
144     }
145 }
146 
147 /// Copies at most 16B of data to/from linear
148 trait Copy16B {
149     const X_DIVISOR: u32;
150 
copy(tiled: *mut u8, linear: *mut u8, bytes: usize)151     unsafe fn copy(tiled: *mut u8, linear: *mut u8, bytes: usize);
copy_16b(tiled: *mut [u8; 16], linear: *mut [u8; 16])152     unsafe fn copy_16b(tiled: *mut [u8; 16], linear: *mut [u8; 16]) {
153         Self::copy(tiled as *mut _, linear as *mut _, 16);
154     }
155 }
156 
157 struct CopyGOBTuring2D<C: Copy16B> {
158     phantom: std::marker::PhantomData<C>,
159 }
160 
161 impl<C: Copy16B> CopyGOBTuring2D<C> {
for_each_16b(mut f: impl FnMut(u32, u32, u32))162     fn for_each_16b(mut f: impl FnMut(u32, u32, u32)) {
163         for i in 0..2 {
164             f(i * 0x100 + 0x00, i * 32 + 0, 0);
165             f(i * 0x100 + 0x10, i * 32 + 0, 1);
166             f(i * 0x100 + 0x20, i * 32 + 0, 2);
167             f(i * 0x100 + 0x30, i * 32 + 0, 3);
168 
169             f(i * 0x100 + 0x40, i * 32 + 16, 0);
170             f(i * 0x100 + 0x50, i * 32 + 16, 1);
171             f(i * 0x100 + 0x60, i * 32 + 16, 2);
172             f(i * 0x100 + 0x70, i * 32 + 16, 3);
173 
174             f(i * 0x100 + 0x80, i * 32 + 0, 4);
175             f(i * 0x100 + 0x90, i * 32 + 0, 5);
176             f(i * 0x100 + 0xa0, i * 32 + 0, 6);
177             f(i * 0x100 + 0xb0, i * 32 + 0, 7);
178 
179             f(i * 0x100 + 0xc0, i * 32 + 16, 4);
180             f(i * 0x100 + 0xd0, i * 32 + 16, 5);
181             f(i * 0x100 + 0xe0, i * 32 + 16, 6);
182             f(i * 0x100 + 0xf0, i * 32 + 16, 7);
183         }
184     }
185 }
186 
187 impl<C: Copy16B> CopyGOB for CopyGOBTuring2D<C> {
188     const GOB_EXTENT_B: Extent4D<units::Bytes> = Extent4D::new(64, 8, 1, 1);
189     const X_DIVISOR: u32 = C::X_DIVISOR;
190 
copy_gob( tiled: usize, linear: LinearPointer, start: Offset4D<units::Bytes>, end: Offset4D<units::Bytes>, )191     unsafe fn copy_gob(
192         tiled: usize,
193         linear: LinearPointer,
194         start: Offset4D<units::Bytes>,
195         end: Offset4D<units::Bytes>,
196     ) {
197         Self::for_each_16b(|offset, x, y| {
198             if y >= start.y && y < end.y {
199                 let tiled = tiled + (offset as usize);
200                 let linear = linear.at(Offset4D::new(x, y, 0, 0));
201                 if x >= start.x && x + 16 <= end.x {
202                     C::copy_16b(tiled as *mut _, linear as *mut _);
203                 } else if x + 16 >= start.x && x < end.x {
204                     let start = (std::cmp::max(x, start.x) - x) as usize;
205                     let end = std::cmp::min(end.x - x, 16) as usize;
206                     C::copy(
207                         (tiled + start) as *mut _,
208                         (linear + start) as *mut _,
209                         end - start,
210                     );
211                 }
212             }
213         });
214     }
215 
copy_whole_gob(tiled: usize, linear: LinearPointer)216     unsafe fn copy_whole_gob(tiled: usize, linear: LinearPointer) {
217         Self::for_each_16b(|offset, x, y| {
218             let tiled = tiled + (offset as usize);
219             let linear = linear.at(Offset4D::new(x, y, 0, 0));
220             C::copy_16b(tiled as *mut _, linear as *mut _);
221         });
222     }
223 }
224 
aligned_range(start: u32, end: u32, align: u32) -> Range<u32>225 fn aligned_range(start: u32, end: u32, align: u32) -> Range<u32> {
226     debug_assert!(align.is_power_of_two());
227     let align_1 = align - 1;
228     (start & !align_1)..((end + align_1) & !align_1)
229 }
230 
chunk_range( whole: Range<u32>, chunk_start: u32, chunk_len: u32, ) -> Range<u32>231 fn chunk_range(
232     whole: Range<u32>,
233     chunk_start: u32,
234     chunk_len: u32,
235 ) -> Range<u32> {
236     debug_assert!(chunk_start < whole.end);
237     let start = if chunk_start < whole.start {
238         whole.start - chunk_start
239     } else {
240         0
241     };
242     let end = std::cmp::min(whole.end - chunk_start, chunk_len);
243     start..end
244 }
245 
for_each_extent4d<U>( start: Offset4D<U>, end: Offset4D<U>, chunk: Extent4D<U>, mut f: impl FnMut(Offset4D<U>, Offset4D<U>, Offset4D<U>), )246 fn for_each_extent4d<U>(
247     start: Offset4D<U>,
248     end: Offset4D<U>,
249     chunk: Extent4D<U>,
250     mut f: impl FnMut(Offset4D<U>, Offset4D<U>, Offset4D<U>),
251 ) {
252     debug_assert!(chunk.width.is_power_of_two());
253     debug_assert!(chunk.height.is_power_of_two());
254     debug_assert!(chunk.depth.is_power_of_two());
255     debug_assert!(chunk.array_len == 1);
256 
257     debug_assert!(start.a == 0);
258     debug_assert!(end.a == 1);
259 
260     let x_range = aligned_range(start.x, end.x, chunk.width);
261     let y_range = aligned_range(start.y, end.y, chunk.height);
262     let z_range = aligned_range(start.z, end.z, chunk.depth);
263 
264     for z in z_range.step_by(chunk.depth as usize) {
265         let chunk_z = chunk_range(start.z..end.z, z, chunk.depth);
266         for y in y_range.clone().step_by(chunk.height as usize) {
267             let chunk_y = chunk_range(start.y..end.y, y, chunk.height);
268             for x in x_range.clone().step_by(chunk.width as usize) {
269                 let chunk_x = chunk_range(start.x..end.x, x, chunk.width);
270                 let chunk_start = Offset4D::new(x, y, z, start.a);
271                 let start = Offset4D::new(
272                     chunk_x.start,
273                     chunk_y.start,
274                     chunk_z.start,
275                     start.a,
276                 );
277                 let end =
278                     Offset4D::new(chunk_x.end, chunk_y.end, chunk_z.end, end.a);
279                 f(chunk_start, start, end);
280             }
281         }
282     }
283 }
284 
for_each_extent4d_aligned<U>( start: Offset4D<U>, end: Offset4D<U>, chunk: Extent4D<U>, mut f: impl FnMut(Offset4D<U>), )285 fn for_each_extent4d_aligned<U>(
286     start: Offset4D<U>,
287     end: Offset4D<U>,
288     chunk: Extent4D<U>,
289     mut f: impl FnMut(Offset4D<U>),
290 ) {
291     debug_assert!(start.x % chunk.width == 0);
292     debug_assert!(start.y % chunk.height == 0);
293     debug_assert!(start.z % chunk.depth == 0);
294     debug_assert!(start.a == 0);
295 
296     debug_assert!(end.x % chunk.width == 0);
297     debug_assert!(end.y % chunk.height == 0);
298     debug_assert!(end.z % chunk.depth == 0);
299     debug_assert!(end.a == 1);
300 
301     debug_assert!(chunk.width.is_power_of_two());
302     debug_assert!(chunk.height.is_power_of_two());
303     debug_assert!(chunk.depth.is_power_of_two());
304     debug_assert!(chunk.array_len == 1);
305 
306     for z in (start.z..end.z).step_by(chunk.depth as usize) {
307         for y in (start.y..end.y).step_by(chunk.height as usize) {
308             for x in (start.x..end.x).step_by(chunk.width as usize) {
309                 f(Offset4D::new(x, y, z, start.a));
310             }
311         }
312     }
313 }
314 
315 struct BlockPointer {
316     pointer: usize,
317     x_mul: usize,
318     y_mul: usize,
319     z_mul: usize,
320     #[cfg(debug_assertions)]
321     bl_extent: Extent4D<units::Bytes>,
322 }
323 
324 impl BlockPointer {
new( pointer: usize, bl_extent: Extent4D<units::Bytes>, extent: Extent4D<units::Bytes>, ) -> BlockPointer325     fn new(
326         pointer: usize,
327         bl_extent: Extent4D<units::Bytes>,
328         extent: Extent4D<units::Bytes>,
329     ) -> BlockPointer {
330         debug_assert!(bl_extent.array_len == 1);
331 
332         debug_assert!(extent.width % bl_extent.width == 0);
333         debug_assert!(extent.height % bl_extent.height == 0);
334         debug_assert!(extent.depth % bl_extent.depth == 0);
335         debug_assert!(extent.array_len == 1);
336 
337         BlockPointer {
338             pointer,
339             // We assume that offsets passed to at() are aligned to bl_extent so
340             //
341             //    x_bl * bl_size_B
342             //  = (x / bl_extent.width) * bl_size_B
343             //  = x * (bl_size_B / bl_extent.width)
344             //  = x * bl_extent.height * bl_extent.depth
345             x_mul: (bl_extent.height as usize) * (bl_extent.depth as usize),
346 
347             //   y_bl * width_bl * bl_size_B
348             //   (y / bl_extent.height) * width_bl * bl_size_B
349             // = y * (bl_size_B / bl_extent.height) * width_bl
350             // = y * bl_extent.width * bl_extent.depth * width_bl
351             // = y * (width_bl * bl_extent.width) * bl_extent.depth
352             // = x * extent.width * bl_extent.depth
353             y_mul: (extent.width as usize) * (bl_extent.depth as usize),
354 
355             //   z_bl * width_bl * height_bl * bl_size_B
356             // = (z / bl_extent.depth) * width_bl * height_bl * bl_size_B
357             // = z * (bl_size_B / bl_extent.depth) * width_bl * height_bl
358             // = z * (bl_extent.width * bl_extent.height) * width_bl * height_bl
359             // = z * width_bl * bl_extent.width * height_bl * bl_extent.height
360             // = z * extent.width * extent.height
361             z_mul: (extent.width as usize) * (extent.height as usize),
362 
363             #[cfg(debug_assertions)]
364             bl_extent,
365         }
366     }
367 
368     #[inline]
at(&self, offset: Offset4D<units::Bytes>) -> usize369     fn at(&self, offset: Offset4D<units::Bytes>) -> usize {
370         #[cfg(debug_assertions)]
371         {
372             debug_assert!(offset.x % self.bl_extent.width == 0);
373             debug_assert!(offset.y % self.bl_extent.height == 0);
374             debug_assert!(offset.z % self.bl_extent.depth == 0);
375             debug_assert!(offset.a == 0);
376         }
377 
378         self.pointer
379             + (offset.z as usize) * self.z_mul
380             + (offset.y as usize) * self.y_mul
381             + (offset.x as usize) * self.x_mul
382     }
383 }
384 
385 #[derive(Copy, Clone)]
386 struct LinearPointer {
387     pointer: usize,
388     x_shift: u32,
389     row_stride_B: usize,
390     plane_stride_B: usize,
391 }
392 
393 impl LinearPointer {
new( pointer: usize, x_divisor: u32, row_stride_B: usize, plane_stride_B: usize, ) -> LinearPointer394     fn new(
395         pointer: usize,
396         x_divisor: u32,
397         row_stride_B: usize,
398         plane_stride_B: usize,
399     ) -> LinearPointer {
400         debug_assert!(x_divisor.is_power_of_two());
401         LinearPointer {
402             pointer,
403             x_shift: x_divisor.ilog2(),
404             row_stride_B,
405             plane_stride_B,
406         }
407     }
408 
x_divisor(&self) -> u32409     fn x_divisor(&self) -> u32 {
410         1 << self.x_shift
411     }
412 
413     #[inline]
reverse(self, offset: Offset4D<units::Bytes>) -> LinearPointer414     fn reverse(self, offset: Offset4D<units::Bytes>) -> LinearPointer {
415         debug_assert!(offset.x % (1 << self.x_shift) == 0);
416         debug_assert!(offset.a == 0);
417         LinearPointer {
418             pointer: self
419                 .pointer
420                 .wrapping_sub((offset.z as usize) * self.plane_stride_B)
421                 .wrapping_sub((offset.y as usize) * self.row_stride_B)
422                 .wrapping_sub((offset.x >> self.x_shift) as usize),
423             x_shift: self.x_shift,
424             row_stride_B: self.row_stride_B,
425             plane_stride_B: self.plane_stride_B,
426         }
427     }
428 
429     #[inline]
at(self, offset: Offset4D<units::Bytes>) -> usize430     fn at(self, offset: Offset4D<units::Bytes>) -> usize {
431         debug_assert!(offset.x % (1 << self.x_shift) == 0);
432         debug_assert!(offset.a == 0);
433         self.pointer
434             .wrapping_add((offset.z as usize) * self.plane_stride_B)
435             .wrapping_add((offset.y as usize) * self.row_stride_B)
436             .wrapping_add((offset.x >> self.x_shift) as usize)
437     }
438 
439     #[inline]
offset(self, offset: Offset4D<units::Bytes>) -> LinearPointer440     fn offset(self, offset: Offset4D<units::Bytes>) -> LinearPointer {
441         LinearPointer {
442             pointer: self.at(offset),
443             x_shift: self.x_shift,
444             row_stride_B: self.row_stride_B,
445             plane_stride_B: self.plane_stride_B,
446         }
447     }
448 }
449 
copy_tile<CG: CopyGOB>( tiling: Tiling, tile_ptr: usize, linear: LinearPointer, start: Offset4D<units::Bytes>, end: Offset4D<units::Bytes>, )450 unsafe fn copy_tile<CG: CopyGOB>(
451     tiling: Tiling,
452     tile_ptr: usize,
453     linear: LinearPointer,
454     start: Offset4D<units::Bytes>,
455     end: Offset4D<units::Bytes>,
456 ) {
457     debug_assert!(linear.x_divisor() == CG::X_DIVISOR);
458     debug_assert!(tiling.gob_type.extent_B() == CG::GOB_EXTENT_B);
459 
460     let tile_extent_B = tiling.extent_B();
461     let tile_ptr = BlockPointer::new(tile_ptr, CG::GOB_EXTENT_B, tile_extent_B);
462 
463     if start.is_aligned_to(CG::GOB_EXTENT_B)
464         && end.is_aligned_to(CG::GOB_EXTENT_B)
465     {
466         for_each_extent4d_aligned(start, end, CG::GOB_EXTENT_B, |gob| {
467             CG::copy_whole_gob(tile_ptr.at(gob), linear.offset(gob));
468         });
469     } else {
470         for_each_extent4d(start, end, CG::GOB_EXTENT_B, |gob, start, end| {
471             let tiled = tile_ptr.at(gob);
472             let linear = linear.offset(gob);
473             if start == Offset4D::new(0, 0, 0, 0)
474                 && end == Offset4D::new(0, 0, 0, 0) + CG::GOB_EXTENT_B
475             {
476                 CG::copy_whole_gob(tiled, linear);
477             } else {
478                 CG::copy_gob(tiled, linear, start, end);
479             }
480         });
481     }
482 }
483 
copy_tiled<CG: CopyGOB>( tiling: Tiling, level_extent_B: Extent4D<units::Bytes>, level_tiled_ptr: usize, linear: LinearPointer, start: Offset4D<units::Bytes>, end: Offset4D<units::Bytes>, )484 unsafe fn copy_tiled<CG: CopyGOB>(
485     tiling: Tiling,
486     level_extent_B: Extent4D<units::Bytes>,
487     level_tiled_ptr: usize,
488     linear: LinearPointer,
489     start: Offset4D<units::Bytes>,
490     end: Offset4D<units::Bytes>,
491 ) {
492     let tile_extent_B = tiling.extent_B();
493     let level_extent_B = level_extent_B.align(&tile_extent_B);
494 
495     // Back up the linear pointer so it also points at the start of the level.
496     // This way, every step of the iteration can assume that both pointers
497     // point to the start chunk of the level, tile, or GOB.
498     let linear = linear.reverse(start);
499 
500     let level_tiled_ptr =
501         BlockPointer::new(level_tiled_ptr, tile_extent_B, level_extent_B);
502 
503     for_each_extent4d(start, end, tile_extent_B, |tile, start, end| {
504         let tile_ptr = level_tiled_ptr.at(tile);
505         let linear = linear.offset(tile);
506         copy_tile::<CG>(tiling, tile_ptr, linear, start, end);
507     });
508 }
509 
510 struct RawCopyToTiled {}
511 
512 impl Copy16B for RawCopyToTiled {
513     const X_DIVISOR: u32 = 1;
514 
copy(tiled: *mut u8, linear: *mut u8, bytes: usize)515     unsafe fn copy(tiled: *mut u8, linear: *mut u8, bytes: usize) {
516         // This is backwards from memcpy
517         std::ptr::copy_nonoverlapping(linear, tiled, bytes);
518     }
519 }
520 
521 struct RawCopyToLinear {}
522 
523 impl Copy16B for RawCopyToLinear {
524     const X_DIVISOR: u32 = 1;
525 
copy(tiled: *mut u8, linear: *mut u8, bytes: usize)526     unsafe fn copy(tiled: *mut u8, linear: *mut u8, bytes: usize) {
527         // This is backwards from memcpy
528         std::ptr::copy_nonoverlapping(tiled, linear, bytes);
529     }
530 }
531 
532 #[no_mangle]
nil_copy_linear_to_tiled( tiled_dst: *mut c_void, level_extent_B: Extent4D<units::Bytes>, linear_src: *const c_void, linear_row_stride_B: usize, linear_plane_stride_B: usize, offset_B: Offset4D<units::Bytes>, extent_B: Extent4D<units::Bytes>, tiling: &Tiling, )533 pub unsafe extern "C" fn nil_copy_linear_to_tiled(
534     tiled_dst: *mut c_void,
535     level_extent_B: Extent4D<units::Bytes>,
536     linear_src: *const c_void,
537     linear_row_stride_B: usize,
538     linear_plane_stride_B: usize,
539     offset_B: Offset4D<units::Bytes>,
540     extent_B: Extent4D<units::Bytes>,
541     tiling: &Tiling,
542 ) {
543     let end_B = offset_B + extent_B;
544 
545     let linear_src = linear_src as usize;
546     let tiled_dst = tiled_dst as usize;
547     let linear_pointer = LinearPointer::new(
548         linear_src,
549         1,
550         linear_row_stride_B,
551         linear_plane_stride_B,
552     );
553 
554     copy_tiled::<CopyGOBTuring2D<RawCopyToTiled>>(
555         *tiling,
556         level_extent_B,
557         tiled_dst,
558         linear_pointer,
559         offset_B,
560         end_B,
561     );
562 }
563 
564 #[no_mangle]
nil_copy_tiled_to_linear( linear_dst: *mut c_void, linear_row_stride_B: usize, linear_plane_stride_B: usize, tiled_src: *const c_void, level_extent_B: Extent4D<units::Bytes>, offset_B: Offset4D<units::Bytes>, extent_B: Extent4D<units::Bytes>, tiling: &Tiling, )565 pub unsafe extern "C" fn nil_copy_tiled_to_linear(
566     linear_dst: *mut c_void,
567     linear_row_stride_B: usize,
568     linear_plane_stride_B: usize,
569     tiled_src: *const c_void,
570     level_extent_B: Extent4D<units::Bytes>,
571     offset_B: Offset4D<units::Bytes>,
572     extent_B: Extent4D<units::Bytes>,
573     tiling: &Tiling,
574 ) {
575     let mut end_B = offset_B + extent_B;
576     end_B.a = 1;
577     let linear_dst = linear_dst as usize;
578     let tiled_src = tiled_src as usize;
579     let linear_pointer = LinearPointer::new(
580         linear_dst,
581         1,
582         linear_row_stride_B,
583         linear_plane_stride_B,
584     );
585 
586     copy_tiled::<CopyGOBTuring2D<RawCopyToLinear>>(
587         *tiling,
588         level_extent_B,
589         tiled_src,
590         linear_pointer,
591         offset_B,
592         end_B,
593     );
594 }
595