1 // Copyright © 2024 Valve Corp. and Collabora, Ltd.
2 // SPDX-License-Identifier: MIT
3
4 use crate::extent::{units, Extent4D, Offset4D};
5 use crate::tiling::Tiling;
6
7 use std::ffi::c_void;
8 use std::ops::Range;
9
10 // This file is dedicated to the internal tiling layout, mainly in the context
11 // of CPU-based tiled memcpy implementations (and helpers) for VK_EXT_host_image_copy
12 //
13 // Work here is based on isl_tiled_memcpy, fd6_tiled_memcpy, old work by Rebecca Mckeever,
14 // and https://fgiesen.wordpress.com/2011/01/17/texture-tiling-and-swizzling/
15 //
16 // On NVIDIA, the tiling system is a two-tier one, and images are first tiled in
17 // a grid of rows of tiles (called "Blocks") with one or more columns:
18 //
19 // +----------+----------+----------+----------+
20 // | Block 0 | Block 1 | Block 2 | Block 3 |
21 // +----------+----------+----------+----------+
22 // | Block 4 | Block 5 | Block 6 | Block 7 |
23 // +----------+----------+----------+----------+
24 // | Block 8 | Block 9 | Block 10 | Block 11 |
25 // +----------+----------+----------+----------+
26 //
27 // The blocks themselves are ordered linearly as can be seen above, which is
28 // where the "Block Linear" naming comes from for NVIDIA's tiling scheme.
29 //
30 // For 3D images, each block continues in the Z direction such that tiles
31 // contain multiple Z slices. If the image depth is longer than the
32 // block depth, there will be more than one layer of blocks, where a layer is
33 // made up of 1 or more Z slices. For example, if the above tile pattern was
34 // the first layer of a multilayer arrangement, the second layer would be:
35 //
36 // +----------+----------+----------+----------+
37 // | Block 12 | Block 13 | Block 14 | Block 15 |
38 // +----------+----------+----------+----------+
39 // | Block 16 | Block 17 | Block 18 | Block 19 |
40 // +----------+----------+----------+----------+
41 // | Block 20 | Block 21 | Block 22 | Block 23 |
42 // +----------+----------+----------+----------+
43 //
44 // The number of rows, columns, and layers of tiles can thus be deduced to be:
45 // rows >= ceiling(image_height / block_height)
46 // columns >= ceiling(image_width / block_width)
47 // layers >= ceiling(image_depth / block_depth)
48 //
49 // Where block_width is a constant 64B (unless for sparse) and block_height
50 // can be either 8 or 16 GOBs tall (more on GOBs below). For us, block_depth
51 // is one for now.
52 //
53 // The >= is in case the blocks around the edges are partial.
54 //
55 // Now comes the second tier. Each block is composed of GOBs (Groups of Bytes)
56 // arranged in ascending order in a single column:
57 //
58 // +---------------------------+
59 // | GOB 0 |
60 // +---------------------------+
61 // | GOB 1 |
62 // +---------------------------+
63 // | GOB 2 |
64 // +---------------------------+
65 // | GOB 3 |
66 // +---------------------------+
67 //
68 // The number of GOBs in a full block is
69 // block_height * block_depth
70 //
71 // An Ampere GOB is 512 bytes, arranged in a 64x8 layout and split into Sectors.
72 // Each Sector is 32 Bytes, and the Sectors in a GOB are arranged in a 16x2
73 // layout (i.e., two 16B lines on top of each other). It's then arranged into
74 // two columns that are 2 sectors by 4, leading to a 4x4 grid of sectors:
75 //
76 // +----------+----------+----------+----------+
77 // | Sector 0 | Sector 1 | Sector 0 | Sector 1 |
78 // +----------+----------+----------+----------+
79 // | Sector 2 | Sector 3 | Sector 2 | Sector 3 |
80 // +----------+----------+----------+----------+
81 // | Sector 4 | Sector 5 | Sector 4 | Sector 5 |
82 // +----------+----------+----------+----------+
83 // | Sector 6 | Sector 7 | Sector 6 | Sector 7 |
84 // +----------+----------+----------+----------+
85 //
86 // From the given pixel address equations in the Orin manual, we arrived at
87 // the following bit interleave pattern for the pixel address:
88 //
89 // b8 b7 b6 b5 b4 b3 b2 b1 b0
90 // --------------------------
91 // x5 y2 y1 x4 y0 x3 x2 x1 x0
92 //
93 // Which would look something like this:
94 // fn get_pixel_offset(
95 // x: usize,
96 // y: usize,
97 // ) -> usize {
98 // (x & 15) |
99 // (y & 1) << 4 |
100 // (x & 16) << 1 |
101 // (y & 2) << 5 |
102 // (x & 32) << 3
103 // }
104 //
105 //
106
107 // The way our implementation will work is by splitting an image into tiles, then
108 // each tile will be broken into its GOBs, and finally each GOB into sectors,
109 // where each sector will be copied into its position.
110 //
111 // For code sharing and cleanliness, we write everything to be very generic,
112 // so as to be shared between Linear <-> Tiled and Tiled <-> Linear paths, and
113 // (ab)use Rust's traits to specialize the last level (copy_gob/copy_whole_gob)
114 // for a particular direction.
115 //
116 // The copy_x and copy_whole_x distinction is made because if we can guarantee
117 // that tiles/gobs are whole and aligned, we can skip all bounds checking and
118 // copy things in fast and tight loops
119
120 /// Copies a GOB
121 ///
122 /// This trait should be implemented twice for each GOB type, once for
123 /// tiled-to-linear and once for linear-to-tiled. This allows to implement
124 /// the rest of tiled copies in a generic way.
125 trait CopyGOB {
126 const GOB_EXTENT_B: Extent4D<units::Bytes>;
127 const X_DIVISOR: u32;
128
copy_gob( tiled: usize, linear: LinearPointer, start: Offset4D<units::Bytes>, end: Offset4D<units::Bytes>, )129 unsafe fn copy_gob(
130 tiled: usize,
131 linear: LinearPointer,
132 start: Offset4D<units::Bytes>,
133 end: Offset4D<units::Bytes>,
134 );
135
136 // No bounding box for this one
copy_whole_gob(tiled: usize, linear: LinearPointer)137 unsafe fn copy_whole_gob(tiled: usize, linear: LinearPointer) {
138 Self::copy_gob(
139 tiled,
140 linear,
141 Offset4D::new(0, 0, 0, 0),
142 Offset4D::new(0, 0, 0, 0) + Self::GOB_EXTENT_B,
143 );
144 }
145 }
146
147 /// Copies at most 16B of data to/from linear
148 trait Copy16B {
149 const X_DIVISOR: u32;
150
copy(tiled: *mut u8, linear: *mut u8, bytes: usize)151 unsafe fn copy(tiled: *mut u8, linear: *mut u8, bytes: usize);
copy_16b(tiled: *mut [u8; 16], linear: *mut [u8; 16])152 unsafe fn copy_16b(tiled: *mut [u8; 16], linear: *mut [u8; 16]) {
153 Self::copy(tiled as *mut _, linear as *mut _, 16);
154 }
155 }
156
157 struct CopyGOBTuring2D<C: Copy16B> {
158 phantom: std::marker::PhantomData<C>,
159 }
160
161 impl<C: Copy16B> CopyGOBTuring2D<C> {
for_each_16b(mut f: impl FnMut(u32, u32, u32))162 fn for_each_16b(mut f: impl FnMut(u32, u32, u32)) {
163 for i in 0..2 {
164 f(i * 0x100 + 0x00, i * 32 + 0, 0);
165 f(i * 0x100 + 0x10, i * 32 + 0, 1);
166 f(i * 0x100 + 0x20, i * 32 + 0, 2);
167 f(i * 0x100 + 0x30, i * 32 + 0, 3);
168
169 f(i * 0x100 + 0x40, i * 32 + 16, 0);
170 f(i * 0x100 + 0x50, i * 32 + 16, 1);
171 f(i * 0x100 + 0x60, i * 32 + 16, 2);
172 f(i * 0x100 + 0x70, i * 32 + 16, 3);
173
174 f(i * 0x100 + 0x80, i * 32 + 0, 4);
175 f(i * 0x100 + 0x90, i * 32 + 0, 5);
176 f(i * 0x100 + 0xa0, i * 32 + 0, 6);
177 f(i * 0x100 + 0xb0, i * 32 + 0, 7);
178
179 f(i * 0x100 + 0xc0, i * 32 + 16, 4);
180 f(i * 0x100 + 0xd0, i * 32 + 16, 5);
181 f(i * 0x100 + 0xe0, i * 32 + 16, 6);
182 f(i * 0x100 + 0xf0, i * 32 + 16, 7);
183 }
184 }
185 }
186
187 impl<C: Copy16B> CopyGOB for CopyGOBTuring2D<C> {
188 const GOB_EXTENT_B: Extent4D<units::Bytes> = Extent4D::new(64, 8, 1, 1);
189 const X_DIVISOR: u32 = C::X_DIVISOR;
190
copy_gob( tiled: usize, linear: LinearPointer, start: Offset4D<units::Bytes>, end: Offset4D<units::Bytes>, )191 unsafe fn copy_gob(
192 tiled: usize,
193 linear: LinearPointer,
194 start: Offset4D<units::Bytes>,
195 end: Offset4D<units::Bytes>,
196 ) {
197 Self::for_each_16b(|offset, x, y| {
198 if y >= start.y && y < end.y {
199 let tiled = tiled + (offset as usize);
200 let linear = linear.at(Offset4D::new(x, y, 0, 0));
201 if x >= start.x && x + 16 <= end.x {
202 C::copy_16b(tiled as *mut _, linear as *mut _);
203 } else if x + 16 >= start.x && x < end.x {
204 let start = (std::cmp::max(x, start.x) - x) as usize;
205 let end = std::cmp::min(end.x - x, 16) as usize;
206 C::copy(
207 (tiled + start) as *mut _,
208 (linear + start) as *mut _,
209 end - start,
210 );
211 }
212 }
213 });
214 }
215
copy_whole_gob(tiled: usize, linear: LinearPointer)216 unsafe fn copy_whole_gob(tiled: usize, linear: LinearPointer) {
217 Self::for_each_16b(|offset, x, y| {
218 let tiled = tiled + (offset as usize);
219 let linear = linear.at(Offset4D::new(x, y, 0, 0));
220 C::copy_16b(tiled as *mut _, linear as *mut _);
221 });
222 }
223 }
224
aligned_range(start: u32, end: u32, align: u32) -> Range<u32>225 fn aligned_range(start: u32, end: u32, align: u32) -> Range<u32> {
226 debug_assert!(align.is_power_of_two());
227 let align_1 = align - 1;
228 (start & !align_1)..((end + align_1) & !align_1)
229 }
230
chunk_range( whole: Range<u32>, chunk_start: u32, chunk_len: u32, ) -> Range<u32>231 fn chunk_range(
232 whole: Range<u32>,
233 chunk_start: u32,
234 chunk_len: u32,
235 ) -> Range<u32> {
236 debug_assert!(chunk_start < whole.end);
237 let start = if chunk_start < whole.start {
238 whole.start - chunk_start
239 } else {
240 0
241 };
242 let end = std::cmp::min(whole.end - chunk_start, chunk_len);
243 start..end
244 }
245
for_each_extent4d<U>( start: Offset4D<U>, end: Offset4D<U>, chunk: Extent4D<U>, mut f: impl FnMut(Offset4D<U>, Offset4D<U>, Offset4D<U>), )246 fn for_each_extent4d<U>(
247 start: Offset4D<U>,
248 end: Offset4D<U>,
249 chunk: Extent4D<U>,
250 mut f: impl FnMut(Offset4D<U>, Offset4D<U>, Offset4D<U>),
251 ) {
252 debug_assert!(chunk.width.is_power_of_two());
253 debug_assert!(chunk.height.is_power_of_two());
254 debug_assert!(chunk.depth.is_power_of_two());
255 debug_assert!(chunk.array_len == 1);
256
257 debug_assert!(start.a == 0);
258 debug_assert!(end.a == 1);
259
260 let x_range = aligned_range(start.x, end.x, chunk.width);
261 let y_range = aligned_range(start.y, end.y, chunk.height);
262 let z_range = aligned_range(start.z, end.z, chunk.depth);
263
264 for z in z_range.step_by(chunk.depth as usize) {
265 let chunk_z = chunk_range(start.z..end.z, z, chunk.depth);
266 for y in y_range.clone().step_by(chunk.height as usize) {
267 let chunk_y = chunk_range(start.y..end.y, y, chunk.height);
268 for x in x_range.clone().step_by(chunk.width as usize) {
269 let chunk_x = chunk_range(start.x..end.x, x, chunk.width);
270 let chunk_start = Offset4D::new(x, y, z, start.a);
271 let start = Offset4D::new(
272 chunk_x.start,
273 chunk_y.start,
274 chunk_z.start,
275 start.a,
276 );
277 let end =
278 Offset4D::new(chunk_x.end, chunk_y.end, chunk_z.end, end.a);
279 f(chunk_start, start, end);
280 }
281 }
282 }
283 }
284
for_each_extent4d_aligned<U>( start: Offset4D<U>, end: Offset4D<U>, chunk: Extent4D<U>, mut f: impl FnMut(Offset4D<U>), )285 fn for_each_extent4d_aligned<U>(
286 start: Offset4D<U>,
287 end: Offset4D<U>,
288 chunk: Extent4D<U>,
289 mut f: impl FnMut(Offset4D<U>),
290 ) {
291 debug_assert!(start.x % chunk.width == 0);
292 debug_assert!(start.y % chunk.height == 0);
293 debug_assert!(start.z % chunk.depth == 0);
294 debug_assert!(start.a == 0);
295
296 debug_assert!(end.x % chunk.width == 0);
297 debug_assert!(end.y % chunk.height == 0);
298 debug_assert!(end.z % chunk.depth == 0);
299 debug_assert!(end.a == 1);
300
301 debug_assert!(chunk.width.is_power_of_two());
302 debug_assert!(chunk.height.is_power_of_two());
303 debug_assert!(chunk.depth.is_power_of_two());
304 debug_assert!(chunk.array_len == 1);
305
306 for z in (start.z..end.z).step_by(chunk.depth as usize) {
307 for y in (start.y..end.y).step_by(chunk.height as usize) {
308 for x in (start.x..end.x).step_by(chunk.width as usize) {
309 f(Offset4D::new(x, y, z, start.a));
310 }
311 }
312 }
313 }
314
315 struct BlockPointer {
316 pointer: usize,
317 x_mul: usize,
318 y_mul: usize,
319 z_mul: usize,
320 #[cfg(debug_assertions)]
321 bl_extent: Extent4D<units::Bytes>,
322 }
323
324 impl BlockPointer {
new( pointer: usize, bl_extent: Extent4D<units::Bytes>, extent: Extent4D<units::Bytes>, ) -> BlockPointer325 fn new(
326 pointer: usize,
327 bl_extent: Extent4D<units::Bytes>,
328 extent: Extent4D<units::Bytes>,
329 ) -> BlockPointer {
330 debug_assert!(bl_extent.array_len == 1);
331
332 debug_assert!(extent.width % bl_extent.width == 0);
333 debug_assert!(extent.height % bl_extent.height == 0);
334 debug_assert!(extent.depth % bl_extent.depth == 0);
335 debug_assert!(extent.array_len == 1);
336
337 BlockPointer {
338 pointer,
339 // We assume that offsets passed to at() are aligned to bl_extent so
340 //
341 // x_bl * bl_size_B
342 // = (x / bl_extent.width) * bl_size_B
343 // = x * (bl_size_B / bl_extent.width)
344 // = x * bl_extent.height * bl_extent.depth
345 x_mul: (bl_extent.height as usize) * (bl_extent.depth as usize),
346
347 // y_bl * width_bl * bl_size_B
348 // (y / bl_extent.height) * width_bl * bl_size_B
349 // = y * (bl_size_B / bl_extent.height) * width_bl
350 // = y * bl_extent.width * bl_extent.depth * width_bl
351 // = y * (width_bl * bl_extent.width) * bl_extent.depth
352 // = x * extent.width * bl_extent.depth
353 y_mul: (extent.width as usize) * (bl_extent.depth as usize),
354
355 // z_bl * width_bl * height_bl * bl_size_B
356 // = (z / bl_extent.depth) * width_bl * height_bl * bl_size_B
357 // = z * (bl_size_B / bl_extent.depth) * width_bl * height_bl
358 // = z * (bl_extent.width * bl_extent.height) * width_bl * height_bl
359 // = z * width_bl * bl_extent.width * height_bl * bl_extent.height
360 // = z * extent.width * extent.height
361 z_mul: (extent.width as usize) * (extent.height as usize),
362
363 #[cfg(debug_assertions)]
364 bl_extent,
365 }
366 }
367
368 #[inline]
at(&self, offset: Offset4D<units::Bytes>) -> usize369 fn at(&self, offset: Offset4D<units::Bytes>) -> usize {
370 #[cfg(debug_assertions)]
371 {
372 debug_assert!(offset.x % self.bl_extent.width == 0);
373 debug_assert!(offset.y % self.bl_extent.height == 0);
374 debug_assert!(offset.z % self.bl_extent.depth == 0);
375 debug_assert!(offset.a == 0);
376 }
377
378 self.pointer
379 + (offset.z as usize) * self.z_mul
380 + (offset.y as usize) * self.y_mul
381 + (offset.x as usize) * self.x_mul
382 }
383 }
384
385 #[derive(Copy, Clone)]
386 struct LinearPointer {
387 pointer: usize,
388 x_shift: u32,
389 row_stride_B: usize,
390 plane_stride_B: usize,
391 }
392
393 impl LinearPointer {
new( pointer: usize, x_divisor: u32, row_stride_B: usize, plane_stride_B: usize, ) -> LinearPointer394 fn new(
395 pointer: usize,
396 x_divisor: u32,
397 row_stride_B: usize,
398 plane_stride_B: usize,
399 ) -> LinearPointer {
400 debug_assert!(x_divisor.is_power_of_two());
401 LinearPointer {
402 pointer,
403 x_shift: x_divisor.ilog2(),
404 row_stride_B,
405 plane_stride_B,
406 }
407 }
408
x_divisor(&self) -> u32409 fn x_divisor(&self) -> u32 {
410 1 << self.x_shift
411 }
412
413 #[inline]
reverse(self, offset: Offset4D<units::Bytes>) -> LinearPointer414 fn reverse(self, offset: Offset4D<units::Bytes>) -> LinearPointer {
415 debug_assert!(offset.x % (1 << self.x_shift) == 0);
416 debug_assert!(offset.a == 0);
417 LinearPointer {
418 pointer: self
419 .pointer
420 .wrapping_sub((offset.z as usize) * self.plane_stride_B)
421 .wrapping_sub((offset.y as usize) * self.row_stride_B)
422 .wrapping_sub((offset.x >> self.x_shift) as usize),
423 x_shift: self.x_shift,
424 row_stride_B: self.row_stride_B,
425 plane_stride_B: self.plane_stride_B,
426 }
427 }
428
429 #[inline]
at(self, offset: Offset4D<units::Bytes>) -> usize430 fn at(self, offset: Offset4D<units::Bytes>) -> usize {
431 debug_assert!(offset.x % (1 << self.x_shift) == 0);
432 debug_assert!(offset.a == 0);
433 self.pointer
434 .wrapping_add((offset.z as usize) * self.plane_stride_B)
435 .wrapping_add((offset.y as usize) * self.row_stride_B)
436 .wrapping_add((offset.x >> self.x_shift) as usize)
437 }
438
439 #[inline]
offset(self, offset: Offset4D<units::Bytes>) -> LinearPointer440 fn offset(self, offset: Offset4D<units::Bytes>) -> LinearPointer {
441 LinearPointer {
442 pointer: self.at(offset),
443 x_shift: self.x_shift,
444 row_stride_B: self.row_stride_B,
445 plane_stride_B: self.plane_stride_B,
446 }
447 }
448 }
449
copy_tile<CG: CopyGOB>( tiling: Tiling, tile_ptr: usize, linear: LinearPointer, start: Offset4D<units::Bytes>, end: Offset4D<units::Bytes>, )450 unsafe fn copy_tile<CG: CopyGOB>(
451 tiling: Tiling,
452 tile_ptr: usize,
453 linear: LinearPointer,
454 start: Offset4D<units::Bytes>,
455 end: Offset4D<units::Bytes>,
456 ) {
457 debug_assert!(linear.x_divisor() == CG::X_DIVISOR);
458 debug_assert!(tiling.gob_type.extent_B() == CG::GOB_EXTENT_B);
459
460 let tile_extent_B = tiling.extent_B();
461 let tile_ptr = BlockPointer::new(tile_ptr, CG::GOB_EXTENT_B, tile_extent_B);
462
463 if start.is_aligned_to(CG::GOB_EXTENT_B)
464 && end.is_aligned_to(CG::GOB_EXTENT_B)
465 {
466 for_each_extent4d_aligned(start, end, CG::GOB_EXTENT_B, |gob| {
467 CG::copy_whole_gob(tile_ptr.at(gob), linear.offset(gob));
468 });
469 } else {
470 for_each_extent4d(start, end, CG::GOB_EXTENT_B, |gob, start, end| {
471 let tiled = tile_ptr.at(gob);
472 let linear = linear.offset(gob);
473 if start == Offset4D::new(0, 0, 0, 0)
474 && end == Offset4D::new(0, 0, 0, 0) + CG::GOB_EXTENT_B
475 {
476 CG::copy_whole_gob(tiled, linear);
477 } else {
478 CG::copy_gob(tiled, linear, start, end);
479 }
480 });
481 }
482 }
483
copy_tiled<CG: CopyGOB>( tiling: Tiling, level_extent_B: Extent4D<units::Bytes>, level_tiled_ptr: usize, linear: LinearPointer, start: Offset4D<units::Bytes>, end: Offset4D<units::Bytes>, )484 unsafe fn copy_tiled<CG: CopyGOB>(
485 tiling: Tiling,
486 level_extent_B: Extent4D<units::Bytes>,
487 level_tiled_ptr: usize,
488 linear: LinearPointer,
489 start: Offset4D<units::Bytes>,
490 end: Offset4D<units::Bytes>,
491 ) {
492 let tile_extent_B = tiling.extent_B();
493 let level_extent_B = level_extent_B.align(&tile_extent_B);
494
495 // Back up the linear pointer so it also points at the start of the level.
496 // This way, every step of the iteration can assume that both pointers
497 // point to the start chunk of the level, tile, or GOB.
498 let linear = linear.reverse(start);
499
500 let level_tiled_ptr =
501 BlockPointer::new(level_tiled_ptr, tile_extent_B, level_extent_B);
502
503 for_each_extent4d(start, end, tile_extent_B, |tile, start, end| {
504 let tile_ptr = level_tiled_ptr.at(tile);
505 let linear = linear.offset(tile);
506 copy_tile::<CG>(tiling, tile_ptr, linear, start, end);
507 });
508 }
509
510 struct RawCopyToTiled {}
511
512 impl Copy16B for RawCopyToTiled {
513 const X_DIVISOR: u32 = 1;
514
copy(tiled: *mut u8, linear: *mut u8, bytes: usize)515 unsafe fn copy(tiled: *mut u8, linear: *mut u8, bytes: usize) {
516 // This is backwards from memcpy
517 std::ptr::copy_nonoverlapping(linear, tiled, bytes);
518 }
519 }
520
521 struct RawCopyToLinear {}
522
523 impl Copy16B for RawCopyToLinear {
524 const X_DIVISOR: u32 = 1;
525
copy(tiled: *mut u8, linear: *mut u8, bytes: usize)526 unsafe fn copy(tiled: *mut u8, linear: *mut u8, bytes: usize) {
527 // This is backwards from memcpy
528 std::ptr::copy_nonoverlapping(tiled, linear, bytes);
529 }
530 }
531
532 #[no_mangle]
nil_copy_linear_to_tiled( tiled_dst: *mut c_void, level_extent_B: Extent4D<units::Bytes>, linear_src: *const c_void, linear_row_stride_B: usize, linear_plane_stride_B: usize, offset_B: Offset4D<units::Bytes>, extent_B: Extent4D<units::Bytes>, tiling: &Tiling, )533 pub unsafe extern "C" fn nil_copy_linear_to_tiled(
534 tiled_dst: *mut c_void,
535 level_extent_B: Extent4D<units::Bytes>,
536 linear_src: *const c_void,
537 linear_row_stride_B: usize,
538 linear_plane_stride_B: usize,
539 offset_B: Offset4D<units::Bytes>,
540 extent_B: Extent4D<units::Bytes>,
541 tiling: &Tiling,
542 ) {
543 let end_B = offset_B + extent_B;
544
545 let linear_src = linear_src as usize;
546 let tiled_dst = tiled_dst as usize;
547 let linear_pointer = LinearPointer::new(
548 linear_src,
549 1,
550 linear_row_stride_B,
551 linear_plane_stride_B,
552 );
553
554 copy_tiled::<CopyGOBTuring2D<RawCopyToTiled>>(
555 *tiling,
556 level_extent_B,
557 tiled_dst,
558 linear_pointer,
559 offset_B,
560 end_B,
561 );
562 }
563
564 #[no_mangle]
nil_copy_tiled_to_linear( linear_dst: *mut c_void, linear_row_stride_B: usize, linear_plane_stride_B: usize, tiled_src: *const c_void, level_extent_B: Extent4D<units::Bytes>, offset_B: Offset4D<units::Bytes>, extent_B: Extent4D<units::Bytes>, tiling: &Tiling, )565 pub unsafe extern "C" fn nil_copy_tiled_to_linear(
566 linear_dst: *mut c_void,
567 linear_row_stride_B: usize,
568 linear_plane_stride_B: usize,
569 tiled_src: *const c_void,
570 level_extent_B: Extent4D<units::Bytes>,
571 offset_B: Offset4D<units::Bytes>,
572 extent_B: Extent4D<units::Bytes>,
573 tiling: &Tiling,
574 ) {
575 let mut end_B = offset_B + extent_B;
576 end_B.a = 1;
577 let linear_dst = linear_dst as usize;
578 let tiled_src = tiled_src as usize;
579 let linear_pointer = LinearPointer::new(
580 linear_dst,
581 1,
582 linear_row_stride_B,
583 linear_plane_stride_B,
584 );
585
586 copy_tiled::<CopyGOBTuring2D<RawCopyToLinear>>(
587 *tiling,
588 level_extent_B,
589 tiled_src,
590 linear_pointer,
591 offset_B,
592 end_B,
593 );
594 }
595