1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23 /* blt command encoding for gen4/5 */
24 #include "crocus_context.h"
25
26 #include "crocus_genx_macros.h"
27 #include "crocus_genx_protos.h"
28 #include "crocus_resource.h"
29
30 #define FILE_DEBUG_FLAG DEBUG_BLIT
31
32 #if GFX_VER <= 5
33
34 static uint32_t
color_depth_for_cpp(int cpp)35 color_depth_for_cpp(int cpp)
36 {
37 switch (cpp) {
38 case 4: return COLOR_DEPTH__32bit;
39 case 2: return COLOR_DEPTH__565;
40 case 1: return COLOR_DEPTH__8bit;
41 default:
42 unreachable("not reached");
43 }
44 }
45
46 static void
blt_set_alpha_to_one(struct crocus_batch * batch,struct crocus_resource * dst,int x,int y,int width,int height)47 blt_set_alpha_to_one(struct crocus_batch *batch,
48 struct crocus_resource *dst,
49 int x, int y, int width, int height)
50 {
51 const struct isl_format_layout *fmtl = isl_format_get_layout(dst->surf.format);
52 unsigned cpp = fmtl->bpb / 8;
53 uint32_t pitch = dst->surf.row_pitch_B;
54
55 if (dst->surf.tiling != ISL_TILING_LINEAR)
56 pitch /= 4;
57 /* We need to split the blit into chunks that each fit within the blitter's
58 * restrictions. We can't use a chunk size of 32768 because we need to
59 * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
60 * a nice round power of two, big enough that performance won't suffer, and
61 * small enough to guarantee everything fits.
62 */
63 const uint32_t max_chunk_size = 16384;
64
65 for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
66 for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
67 const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
68 const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
69 uint32_t tile_x, tile_y;
70 uint64_t offset_B;
71 ASSERTED uint32_t z_offset_el, array_offset;
72 isl_tiling_get_intratile_offset_el(dst->surf.tiling, dst->surf.dim,
73 dst->surf.msaa_layout,
74 cpp * 8, dst->surf.samples,
75 dst->surf.row_pitch_B,
76 dst->surf.array_pitch_el_rows,
77 chunk_x, chunk_y, 0, 0,
78 &offset_B,
79 &tile_x, &tile_y,
80 &z_offset_el, &array_offset);
81 assert(z_offset_el == 0);
82 assert(array_offset == 0);
83 crocus_emit_cmd(batch, GENX(XY_COLOR_BLT), xyblt) {
84 xyblt.TilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
85 xyblt.ColorDepth = color_depth_for_cpp(cpp);
86 xyblt.RasterOperation = 0xF0;
87 xyblt.DestinationPitch = pitch;
88 xyblt._32bppByteMask = 2;
89 xyblt.DestinationBaseAddress = rw_bo(dst->bo, offset_B);
90 xyblt.DestinationX1Coordinate = tile_x;
91 xyblt.DestinationY1Coordinate = tile_y;
92 xyblt.DestinationX2Coordinate = tile_x + chunk_w;
93 xyblt.DestinationY2Coordinate = tile_y + chunk_h;
94 xyblt.SolidPatternColor = 0xffffffff;
95 }
96 }
97 }
98 }
99
validate_blit_for_blt(struct crocus_batch * batch,const struct pipe_blit_info * info)100 static bool validate_blit_for_blt(struct crocus_batch *batch,
101 const struct pipe_blit_info *info)
102 {
103 /* If the source and destination are the same size with no mirroring,
104 * the rectangles are within the size of the texture and there is no
105 * scissor, then we can probably use the blit engine.
106 */
107 if (info->dst.box.width != info->src.box.width ||
108 info->dst.box.height != info->src.box.height)
109 return false;
110
111 if (info->scissor_enable)
112 return false;
113
114 if (info->dst.box.height < 0 || info->src.box.height < 0)
115 return false;
116
117 if (info->dst.box.depth > 1 || info->src.box.depth > 1)
118 return false;
119
120 const struct util_format_description *desc =
121 util_format_description(info->src.format);
122 int i = util_format_get_first_non_void_channel(info->src.format);
123 if (i == -1)
124 return false;
125
126 /* can't do the alpha to 1 setting for these. */
127 if ((util_format_has_alpha1(info->src.format) &&
128 util_format_has_alpha(info->dst.format) &&
129 desc->channel[i].size > 8))
130 return false;
131 return true;
132 }
133
crocus_resource_blt_pitch(struct crocus_resource * res)134 static inline int crocus_resource_blt_pitch(struct crocus_resource *res)
135 {
136 int pitch = res->surf.row_pitch_B;
137 if (res->surf.tiling != ISL_TILING_LINEAR)
138 pitch /= 4;
139 return pitch;
140 }
141
142
emit_copy_blt(struct crocus_batch * batch,struct crocus_resource * src,struct crocus_resource * dst,unsigned cpp,int32_t src_pitch,unsigned src_offset,int32_t dst_pitch,unsigned dst_offset,uint16_t src_x,uint16_t src_y,uint16_t dst_x,uint16_t dst_y,uint16_t w,uint16_t h)143 static bool emit_copy_blt(struct crocus_batch *batch,
144 struct crocus_resource *src,
145 struct crocus_resource *dst,
146 unsigned cpp,
147 int32_t src_pitch,
148 unsigned src_offset,
149 int32_t dst_pitch,
150 unsigned dst_offset,
151 uint16_t src_x, uint16_t src_y,
152 uint16_t dst_x, uint16_t dst_y,
153 uint16_t w, uint16_t h)
154
155 {
156 uint32_t src_tile_w, src_tile_h;
157 uint32_t dst_tile_w, dst_tile_h;
158 int dst_y2 = dst_y + h;
159 int dst_x2 = dst_x + w;
160
161 DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
162 __func__,
163 src, src_pitch, src_offset, src_x, src_y,
164 dst, dst_pitch, dst_offset, dst_x, dst_y, w, h);
165
166 isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h);
167 isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h);
168
169 /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
170 * (X direction width of the Tile). This is ensured while allocating the
171 * buffer object.
172 */
173 assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
174 assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
175
176 /* For big formats (such as floating point), do the copy using 16 or
177 * 32bpp and multiply the coordinates.
178 */
179 if (cpp > 4) {
180 if (cpp % 4 == 2) {
181 dst_x *= cpp / 2;
182 dst_x2 *= cpp / 2;
183 src_x *= cpp / 2;
184 cpp = 2;
185 } else {
186 assert(cpp % 4 == 0);
187 dst_x *= cpp / 4;
188 dst_x2 *= cpp / 4;
189 src_x *= cpp / 4;
190 cpp = 4;
191 }
192 }
193
194 /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop
195 * the low bits. Offsets must be naturally aligned.
196 */
197 if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
198 dst_pitch % 4 != 0 || dst_offset % cpp != 0)
199 return false;
200
201 /* For tiled source and destination, pitch value should be specified
202 * as a number of Dwords.
203 */
204 if (dst->surf.tiling != ISL_TILING_LINEAR)
205 dst_pitch /= 4;
206
207 if (src->surf.tiling != ISL_TILING_LINEAR)
208 src_pitch /= 4;
209
210 assert(cpp <= 4);
211 crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) {
212 xyblt.RasterOperation = 0xCC;
213 xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
214 xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR;
215 xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset);
216 xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset);
217 xyblt.ColorDepth = color_depth_for_cpp(cpp);
218 xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1;
219 xyblt.DestinationX1Coordinate = dst_x;
220 xyblt.DestinationY1Coordinate = dst_y;
221 xyblt.DestinationX2Coordinate = dst_x2;
222 xyblt.DestinationY2Coordinate = dst_y2;
223 xyblt.DestinationPitch = dst_pitch;
224 xyblt.SourceX1Coordinate = src_x;
225 xyblt.SourceY1Coordinate = src_y;
226 xyblt.SourcePitch = src_pitch;
227 };
228
229 crocus_emit_mi_flush(batch);
230 return true;
231 }
232
crocus_emit_blt(struct crocus_batch * batch,struct crocus_resource * src,struct crocus_resource * dst,unsigned dst_level,unsigned dst_x,unsigned dst_y,unsigned dst_z,unsigned src_level,const struct pipe_box * src_box)233 static bool crocus_emit_blt(struct crocus_batch *batch,
234 struct crocus_resource *src,
235 struct crocus_resource *dst,
236 unsigned dst_level,
237 unsigned dst_x, unsigned dst_y,
238 unsigned dst_z,
239 unsigned src_level,
240 const struct pipe_box *src_box)
241 {
242 const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format);
243 unsigned src_cpp = src_fmtl->bpb / 8;
244 const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format);
245 const unsigned dst_cpp = dst_fmtl->bpb / 8;
246 uint16_t src_x, src_y;
247 uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
248 uint32_t src_width = src_box->width, src_height = src_box->height;
249
250 /* gen4/5 can't handle Y tiled blits. */
251 if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0)
252 return false;
253
254 if (src->surf.format != dst->surf.format)
255 return false;
256
257 if (src_cpp != dst_cpp)
258 return false;
259
260 src_x = src_box->x;
261 src_y = src_box->y;
262
263 assert(src_cpp == dst_cpp);
264
265 crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x,
266 &src_image_y);
267 if (util_format_is_compressed(src->base.b.format)) {
268 int bw = util_format_get_blockwidth(src->base.b.format);
269 int bh = util_format_get_blockheight(src->base.b.format);
270 assert(src_x % bw == 0);
271 assert(src_y % bh == 0);
272 src_x /= (int)bw;
273 src_y /= (int)bh;
274 src_width = DIV_ROUND_UP(src_width, (int)bw);
275 src_height = DIV_ROUND_UP(src_height, (int)bh);
276 }
277
278 crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x,
279 &dst_image_y);
280 if (util_format_is_compressed(dst->base.b.format)) {
281 int bw = util_format_get_blockwidth(dst->base.b.format);
282 int bh = util_format_get_blockheight(dst->base.b.format);
283 assert(dst_x % bw == 0);
284 assert(dst_y % bh == 0);
285 dst_x /= (int)bw;
286 dst_y /= (int)bh;
287 }
288 src_x += src_image_x;
289 src_y += src_image_y;
290 dst_x += dst_image_x;
291 dst_y += dst_image_y;
292
293 /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
294 * Data Size Limitations):
295 *
296 * The BLT engine is capable of transferring very large quantities of
297 * graphics data. Any graphics data read from and written to the
298 * destination is permitted to represent a number of pixels that
299 * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
300 * at the destination. The maximum number of pixels that may be
301 * represented per scan line’s worth of graphics data depends on the
302 * color depth.
303 *
304 * The blitter's pitch is a signed 16-bit integer, but measured in bytes
305 * for linear surfaces and DWords for tiled surfaces. So the maximum
306 * pitch is 32k linear and 128k tiled.
307 */
308 if (crocus_resource_blt_pitch(src) >= 32768 ||
309 crocus_resource_blt_pitch(dst) >= 32768) {
310 return false;
311 }
312
313 /* We need to split the blit into chunks that each fit within the blitter's
314 * restrictions. We can't use a chunk size of 32768 because we need to
315 * ensure that src_tile_x + chunk_size fits. We choose 16384 because it's
316 * a nice round power of two, big enough that performance won't suffer, and
317 * small enough to guarantee everything fits.
318 */
319 const uint32_t max_chunk_size = 16384;
320
321 for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) {
322 for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) {
323 const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x);
324 const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y);
325
326 uint64_t src_offset;
327 uint32_t src_tile_x, src_tile_y;
328 ASSERTED uint32_t z_offset_el, array_offset;
329 isl_tiling_get_intratile_offset_el(src->surf.tiling, src->surf.dim,
330 src->surf.msaa_layout,
331 src_cpp * 8, src->surf.samples,
332 src->surf.row_pitch_B,
333 src->surf.array_pitch_el_rows,
334 src_x + chunk_x, src_y + chunk_y, 0, 0,
335 &src_offset,
336 &src_tile_x, &src_tile_y,
337 &z_offset_el, &array_offset);
338 assert(z_offset_el == 0);
339 assert(array_offset == 0);
340
341 uint64_t dst_offset;
342 uint32_t dst_tile_x, dst_tile_y;
343 isl_tiling_get_intratile_offset_el(dst->surf.tiling, dst->surf.dim,
344 dst->surf.msaa_layout,
345 dst_cpp * 8, dst->surf.samples,
346 dst->surf.row_pitch_B,
347 dst->surf.array_pitch_el_rows,
348 dst_x + chunk_x, dst_y + chunk_y, 0, 0,
349 &dst_offset,
350 &dst_tile_x, &dst_tile_y,
351 &z_offset_el, &array_offset);
352 assert(z_offset_el == 0);
353 assert(array_offset == 0);
354 if (!emit_copy_blt(batch, src, dst,
355 src_cpp, src->surf.row_pitch_B,
356 src_offset,
357 dst->surf.row_pitch_B, dst_offset,
358 src_tile_x, src_tile_y,
359 dst_tile_x, dst_tile_y,
360 chunk_w, chunk_h)) {
361 return false;
362 }
363 }
364 }
365
366 if (util_format_has_alpha1(src->base.b.format) &&
367 util_format_has_alpha(dst->base.b.format))
368 blt_set_alpha_to_one(batch, dst, 0, 0, src_width, src_height);
369 return true;
370 }
371
crocus_blit_blt(struct crocus_batch * batch,const struct pipe_blit_info * info)372 static bool crocus_blit_blt(struct crocus_batch *batch,
373 const struct pipe_blit_info *info)
374 {
375 if (!validate_blit_for_blt(batch, info))
376 return false;
377
378 return crocus_emit_blt(batch,
379 (struct crocus_resource *)info->src.resource,
380 (struct crocus_resource *)info->dst.resource,
381 info->dst.level,
382 info->dst.box.x,
383 info->dst.box.y,
384 info->dst.box.z,
385 info->src.level,
386 &info->src.box);
387 }
388
389
crocus_copy_region_blt(struct crocus_batch * batch,struct crocus_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct crocus_resource * src,unsigned src_level,const struct pipe_box * src_box)390 static bool crocus_copy_region_blt(struct crocus_batch *batch,
391 struct crocus_resource *dst,
392 unsigned dst_level,
393 unsigned dstx, unsigned dsty, unsigned dstz,
394 struct crocus_resource *src,
395 unsigned src_level,
396 const struct pipe_box *src_box)
397 {
398 if (dst->base.b.target == PIPE_BUFFER || src->base.b.target == PIPE_BUFFER)
399 return false;
400 return crocus_emit_blt(batch,
401 src,
402 dst,
403 dst_level,
404 dstx, dsty, dstz,
405 src_level,
406 src_box);
407 }
408 #endif
409
410 void
genX(crocus_init_blt)411 genX(crocus_init_blt)(struct crocus_screen *screen)
412 {
413 #if GFX_VER <= 5
414 screen->vtbl.blit_blt = crocus_blit_blt;
415 screen->vtbl.copy_region_blt = crocus_copy_region_blt;
416 #else
417 screen->vtbl.blit_blt = NULL;
418 screen->vtbl.copy_region_blt = NULL;
419 #endif
420 }
421