• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2018 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20  * DEALINGS IN THE SOFTWARE.
21  */
22 
23 /* blt command encoding for gen4/5 */
24 #include "crocus_context.h"
25 
26 #include "crocus_genx_macros.h"
27 #include "crocus_genx_protos.h"
28 #include "crocus_resource.h"
29 
30 #define FILE_DEBUG_FLAG DEBUG_BLIT
31 
32 #if GFX_VER <= 5
33 
34 static uint32_t
color_depth_for_cpp(int cpp)35 color_depth_for_cpp(int cpp)
36 {
37    switch (cpp) {
38    case 4: return COLOR_DEPTH__32bit;
39    case 2: return COLOR_DEPTH__565;
40    case 1: return COLOR_DEPTH__8bit;
41    default:
42       unreachable("not reached");
43    }
44 }
45 
46 static void
blt_set_alpha_to_one(struct crocus_batch * batch,struct crocus_resource * dst,int x,int y,int width,int height)47 blt_set_alpha_to_one(struct crocus_batch *batch,
48 		     struct crocus_resource *dst,
49 		     int x, int y, int width, int height)
50 {
51    const struct isl_format_layout *fmtl = isl_format_get_layout(dst->surf.format);
52    unsigned cpp = fmtl->bpb / 8;
53    uint32_t pitch = dst->surf.row_pitch_B;
54 
55    if (dst->surf.tiling != ISL_TILING_LINEAR)
56       pitch /= 4;
57    /* We need to split the blit into chunks that each fit within the blitter's
58     * restrictions.  We can't use a chunk size of 32768 because we need to
59     * ensure that src_tile_x + chunk_size fits.  We choose 16384 because it's
60     * a nice round power of two, big enough that performance won't suffer, and
61     * small enough to guarantee everything fits.
62     */
63    const uint32_t max_chunk_size = 16384;
64 
65    for (uint32_t chunk_x = 0; chunk_x < width; chunk_x += max_chunk_size) {
66       for (uint32_t chunk_y = 0; chunk_y < height; chunk_y += max_chunk_size) {
67          const uint32_t chunk_w = MIN2(max_chunk_size, width - chunk_x);
68          const uint32_t chunk_h = MIN2(max_chunk_size, height - chunk_y);
69          uint32_t tile_x, tile_y;
70          uint64_t offset_B;
71          ASSERTED uint32_t z_offset_el, array_offset;
72          isl_tiling_get_intratile_offset_el(dst->surf.tiling, dst->surf.dim,
73                                             dst->surf.msaa_layout,
74                                             cpp * 8, dst->surf.samples,
75                                             dst->surf.row_pitch_B,
76                                             dst->surf.array_pitch_el_rows,
77                                             chunk_x, chunk_y, 0, 0,
78                                             &offset_B,
79                                             &tile_x, &tile_y,
80                                             &z_offset_el, &array_offset);
81          assert(z_offset_el == 0);
82          assert(array_offset == 0);
83 	 crocus_emit_cmd(batch, GENX(XY_COLOR_BLT), xyblt) {
84             xyblt.TilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
85             xyblt.ColorDepth = color_depth_for_cpp(cpp);
86             xyblt.RasterOperation = 0xF0;
87             xyblt.DestinationPitch = pitch;
88             xyblt._32bppByteMask = 2;
89             xyblt.DestinationBaseAddress = rw_bo(dst->bo, offset_B);
90             xyblt.DestinationX1Coordinate = tile_x;
91             xyblt.DestinationY1Coordinate = tile_y;
92             xyblt.DestinationX2Coordinate = tile_x + chunk_w;
93             xyblt.DestinationY2Coordinate = tile_y + chunk_h;
94             xyblt.SolidPatternColor = 0xffffffff;
95 	 }
96       }
97    }
98 }
99 
validate_blit_for_blt(struct crocus_batch * batch,const struct pipe_blit_info * info)100 static bool validate_blit_for_blt(struct crocus_batch *batch,
101                                   const struct pipe_blit_info *info)
102 {
103    /* If the source and destination are the same size with no mirroring,
104     * the rectangles are within the size of the texture and there is no
105     * scissor, then we can probably use the blit engine.
106     */
107    if (info->dst.box.width != info->src.box.width ||
108        info->dst.box.height != info->src.box.height)
109       return false;
110 
111    if (info->scissor_enable)
112       return false;
113 
114    if (info->dst.box.height < 0 || info->src.box.height < 0)
115       return false;
116 
117    if (info->dst.box.depth > 1 || info->src.box.depth > 1)
118       return false;
119 
120    const struct util_format_description *desc =
121       util_format_description(info->src.format);
122    int i = util_format_get_first_non_void_channel(info->src.format);
123    if (i == -1)
124       return false;
125 
126    /* can't do the alpha to 1 setting for these. */
127    if ((util_format_has_alpha1(info->src.format) &&
128         util_format_has_alpha(info->dst.format) &&
129         desc->channel[i].size > 8))
130       return false;
131    return true;
132 }
133 
crocus_resource_blt_pitch(struct crocus_resource * res)134 static inline int crocus_resource_blt_pitch(struct crocus_resource *res)
135 {
136    int pitch = res->surf.row_pitch_B;
137    if (res->surf.tiling != ISL_TILING_LINEAR)
138       pitch /= 4;
139    return pitch;
140 }
141 
142 
emit_copy_blt(struct crocus_batch * batch,struct crocus_resource * src,struct crocus_resource * dst,unsigned cpp,int32_t src_pitch,unsigned src_offset,int32_t dst_pitch,unsigned dst_offset,uint16_t src_x,uint16_t src_y,uint16_t dst_x,uint16_t dst_y,uint16_t w,uint16_t h)143 static bool emit_copy_blt(struct crocus_batch *batch,
144                           struct crocus_resource *src,
145                           struct crocus_resource *dst,
146                           unsigned cpp,
147                           int32_t src_pitch,
148                           unsigned src_offset,
149                           int32_t dst_pitch,
150                           unsigned dst_offset,
151                           uint16_t src_x, uint16_t src_y,
152                           uint16_t dst_x, uint16_t dst_y,
153                           uint16_t w, uint16_t h)
154 
155 {
156    uint32_t src_tile_w, src_tile_h;
157    uint32_t dst_tile_w, dst_tile_h;
158    int dst_y2 = dst_y + h;
159    int dst_x2 = dst_x + w;
160 
161    DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
162        __func__,
163        src, src_pitch, src_offset, src_x, src_y,
164        dst, dst_pitch, dst_offset, dst_x, dst_y, w, h);
165 
166    isl_get_tile_dims(src->surf.tiling, cpp, &src_tile_w, &src_tile_h);
167    isl_get_tile_dims(dst->surf.tiling, cpp, &dst_tile_w, &dst_tile_h);
168 
169    /* For Tiled surfaces, the pitch has to be a multiple of the Tile width
170     * (X direction width of the Tile). This is ensured while allocating the
171     * buffer object.
172     */
173    assert(src->surf.tiling == ISL_TILING_LINEAR || (src_pitch % src_tile_w) == 0);
174    assert(dst->surf.tiling == ISL_TILING_LINEAR || (dst_pitch % dst_tile_w) == 0);
175 
176    /* For big formats (such as floating point), do the copy using 16 or
177     * 32bpp and multiply the coordinates.
178     */
179    if (cpp > 4) {
180       if (cpp % 4 == 2) {
181          dst_x *= cpp / 2;
182          dst_x2 *= cpp / 2;
183          src_x *= cpp / 2;
184          cpp = 2;
185       } else {
186          assert(cpp % 4 == 0);
187          dst_x *= cpp / 4;
188          dst_x2 *= cpp / 4;
189          src_x *= cpp / 4;
190          cpp = 4;
191       }
192    }
193 
194    /* Blit pitch must be dword-aligned.  Otherwise, the hardware appears to drop
195     * the low bits.  Offsets must be naturally aligned.
196     */
197    if (src_pitch % 4 != 0 || src_offset % cpp != 0 ||
198        dst_pitch % 4 != 0 || dst_offset % cpp != 0)
199      return false;
200 
201    /* For tiled source and destination, pitch value should be specified
202     * as a number of Dwords.
203     */
204    if (dst->surf.tiling != ISL_TILING_LINEAR)
205       dst_pitch /= 4;
206 
207    if (src->surf.tiling != ISL_TILING_LINEAR)
208       src_pitch /= 4;
209 
210    assert(cpp <= 4);
211    crocus_emit_cmd(batch, GENX(XY_SRC_COPY_BLT), xyblt) {
212       xyblt.RasterOperation = 0xCC;
213       xyblt.DestinationTilingEnable = dst->surf.tiling != ISL_TILING_LINEAR;
214       xyblt.SourceTilingEnable = src->surf.tiling != ISL_TILING_LINEAR;
215       xyblt.SourceBaseAddress = ro_bo(src->bo, src_offset);
216       xyblt.DestinationBaseAddress = rw_bo(dst->bo, dst_offset);
217       xyblt.ColorDepth = color_depth_for_cpp(cpp);
218       xyblt._32bppByteMask = cpp == 4 ? 0x3 : 0x1;
219       xyblt.DestinationX1Coordinate = dst_x;
220       xyblt.DestinationY1Coordinate = dst_y;
221       xyblt.DestinationX2Coordinate = dst_x2;
222       xyblt.DestinationY2Coordinate = dst_y2;
223       xyblt.DestinationPitch = dst_pitch;
224       xyblt.SourceX1Coordinate = src_x;
225       xyblt.SourceY1Coordinate = src_y;
226       xyblt.SourcePitch = src_pitch;
227    };
228 
229    crocus_emit_mi_flush(batch);
230    return true;
231 }
232 
crocus_emit_blt(struct crocus_batch * batch,struct crocus_resource * src,struct crocus_resource * dst,unsigned dst_level,unsigned dst_x,unsigned dst_y,unsigned dst_z,unsigned src_level,const struct pipe_box * src_box)233 static bool crocus_emit_blt(struct crocus_batch *batch,
234                             struct crocus_resource *src,
235                             struct crocus_resource *dst,
236                             unsigned dst_level,
237                             unsigned dst_x, unsigned dst_y,
238                             unsigned dst_z,
239                             unsigned src_level,
240                             const struct pipe_box *src_box)
241 {
242    const struct isl_format_layout *src_fmtl = isl_format_get_layout(src->surf.format);
243    unsigned src_cpp = src_fmtl->bpb / 8;
244    const struct isl_format_layout *dst_fmtl = isl_format_get_layout(dst->surf.format);
245    const unsigned dst_cpp = dst_fmtl->bpb / 8;
246    uint16_t src_x, src_y;
247    uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
248    uint32_t src_width = src_box->width, src_height = src_box->height;
249 
250    /* gen4/5 can't handle Y tiled blits. */
251    if (src->surf.tiling == ISL_TILING_Y0 || dst->surf.tiling == ISL_TILING_Y0)
252       return false;
253 
254    if (src->surf.format != dst->surf.format)
255       return false;
256 
257    if (src_cpp != dst_cpp)
258       return false;
259 
260    src_x = src_box->x;
261    src_y = src_box->y;
262 
263    assert(src_cpp == dst_cpp);
264 
265    crocus_resource_get_image_offset(src, src_level, src_box->z, &src_image_x,
266                                     &src_image_y);
267    if (util_format_is_compressed(src->base.b.format)) {
268       int bw = util_format_get_blockwidth(src->base.b.format);
269       int bh = util_format_get_blockheight(src->base.b.format);
270       assert(src_x % bw == 0);
271       assert(src_y % bh == 0);
272       src_x /= (int)bw;
273       src_y /= (int)bh;
274       src_width = DIV_ROUND_UP(src_width, (int)bw);
275       src_height = DIV_ROUND_UP(src_height, (int)bh);
276    }
277 
278    crocus_resource_get_image_offset(dst, dst_level, dst_z, &dst_image_x,
279                                     &dst_image_y);
280    if (util_format_is_compressed(dst->base.b.format)) {
281       int bw = util_format_get_blockwidth(dst->base.b.format);
282       int bh = util_format_get_blockheight(dst->base.b.format);
283       assert(dst_x % bw == 0);
284       assert(dst_y % bh == 0);
285       dst_x /= (int)bw;
286       dst_y /= (int)bh;
287    }
288    src_x += src_image_x;
289    src_y += src_image_y;
290    dst_x += dst_image_x;
291    dst_y += dst_image_y;
292 
293    /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
294     * Data Size Limitations):
295     *
296     *    The BLT engine is capable of transferring very large quantities of
297     *    graphics data. Any graphics data read from and written to the
298     *    destination is permitted to represent a number of pixels that
299     *    occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
300     *    at the destination. The maximum number of pixels that may be
301     *    represented per scan line’s worth of graphics data depends on the
302     *    color depth.
303     *
304     * The blitter's pitch is a signed 16-bit integer, but measured in bytes
305     * for linear surfaces and DWords for tiled surfaces.  So the maximum
306     * pitch is 32k linear and 128k tiled.
307     */
308    if (crocus_resource_blt_pitch(src) >= 32768 ||
309        crocus_resource_blt_pitch(dst) >= 32768) {
310       return false;
311    }
312 
313    /* We need to split the blit into chunks that each fit within the blitter's
314     * restrictions.  We can't use a chunk size of 32768 because we need to
315     * ensure that src_tile_x + chunk_size fits.  We choose 16384 because it's
316     * a nice round power of two, big enough that performance won't suffer, and
317     * small enough to guarantee everything fits.
318     */
319    const uint32_t max_chunk_size = 16384;
320 
321    for (uint32_t chunk_x = 0; chunk_x < src_width; chunk_x += max_chunk_size) {
322       for (uint32_t chunk_y = 0; chunk_y < src_height; chunk_y += max_chunk_size) {
323          const uint32_t chunk_w = MIN2(max_chunk_size, src_width - chunk_x);
324          const uint32_t chunk_h = MIN2(max_chunk_size, src_height - chunk_y);
325 
326          uint64_t src_offset;
327          uint32_t src_tile_x, src_tile_y;
328          ASSERTED uint32_t z_offset_el, array_offset;
329          isl_tiling_get_intratile_offset_el(src->surf.tiling, src->surf.dim,
330                                             src->surf.msaa_layout,
331                                             src_cpp * 8, src->surf.samples,
332                                             src->surf.row_pitch_B,
333                                             src->surf.array_pitch_el_rows,
334                                             src_x + chunk_x, src_y + chunk_y, 0, 0,
335                                             &src_offset,
336                                             &src_tile_x, &src_tile_y,
337                                             &z_offset_el, &array_offset);
338          assert(z_offset_el == 0);
339          assert(array_offset == 0);
340 
341          uint64_t dst_offset;
342          uint32_t dst_tile_x, dst_tile_y;
343          isl_tiling_get_intratile_offset_el(dst->surf.tiling, dst->surf.dim,
344                                             dst->surf.msaa_layout,
345                                             dst_cpp * 8, dst->surf.samples,
346                                             dst->surf.row_pitch_B,
347                                             dst->surf.array_pitch_el_rows,
348                                             dst_x + chunk_x, dst_y + chunk_y, 0, 0,
349                                             &dst_offset,
350                                             &dst_tile_x, &dst_tile_y,
351                                             &z_offset_el, &array_offset);
352          assert(z_offset_el == 0);
353          assert(array_offset == 0);
354          if (!emit_copy_blt(batch, src, dst,
355                             src_cpp, src->surf.row_pitch_B,
356                             src_offset,
357                             dst->surf.row_pitch_B, dst_offset,
358                             src_tile_x, src_tile_y,
359                             dst_tile_x, dst_tile_y,
360                             chunk_w, chunk_h)) {
361             return false;
362          }
363       }
364    }
365 
366    if (util_format_has_alpha1(src->base.b.format) &&
367        util_format_has_alpha(dst->base.b.format))
368       blt_set_alpha_to_one(batch, dst, 0, 0, src_width, src_height);
369    return true;
370 }
371 
crocus_blit_blt(struct crocus_batch * batch,const struct pipe_blit_info * info)372 static bool crocus_blit_blt(struct crocus_batch *batch,
373                             const struct pipe_blit_info *info)
374 {
375    if (!validate_blit_for_blt(batch, info))
376       return false;
377 
378    return crocus_emit_blt(batch,
379                           (struct crocus_resource *)info->src.resource,
380                           (struct crocus_resource *)info->dst.resource,
381                           info->dst.level,
382                           info->dst.box.x,
383                           info->dst.box.y,
384                           info->dst.box.z,
385                           info->src.level,
386                           &info->src.box);
387 }
388 
389 
crocus_copy_region_blt(struct crocus_batch * batch,struct crocus_resource * dst,unsigned dst_level,unsigned dstx,unsigned dsty,unsigned dstz,struct crocus_resource * src,unsigned src_level,const struct pipe_box * src_box)390 static bool crocus_copy_region_blt(struct crocus_batch *batch,
391                                    struct crocus_resource *dst,
392                                    unsigned dst_level,
393                                    unsigned dstx, unsigned dsty, unsigned dstz,
394                                    struct crocus_resource *src,
395                                    unsigned src_level,
396                                    const struct pipe_box *src_box)
397 {
398    if (dst->base.b.target == PIPE_BUFFER || src->base.b.target == PIPE_BUFFER)
399       return false;
400    return crocus_emit_blt(batch,
401                           src,
402                           dst,
403                           dst_level,
404                           dstx, dsty, dstz,
405                           src_level,
406                           src_box);
407 }
408 #endif
409 
410 void
genX(crocus_init_blt)411 genX(crocus_init_blt)(struct crocus_screen *screen)
412 {
413 #if GFX_VER <= 5
414    screen->vtbl.blit_blt = crocus_blit_blt;
415    screen->vtbl.copy_region_blt = crocus_copy_region_blt;
416 #else
417    screen->vtbl.blit_blt = NULL;
418    screen->vtbl.copy_region_blt = NULL;
419 #endif
420 }
421