• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Mesa 3-D graphics library
3  *
4  * Copyright (C) 2014 LunarG, Inc.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  *
24  * Authors:
25  *    Chia-I Wu <olv@lunarg.com>
26  */
27 
28 #include "ilo_debug.h"
29 #include "ilo_image.h"
30 
31 enum {
32    IMAGE_TILING_NONE = 1 << GEN6_TILING_NONE,
33    IMAGE_TILING_X    = 1 << GEN6_TILING_X,
34    IMAGE_TILING_Y    = 1 << GEN6_TILING_Y,
35    IMAGE_TILING_W    = 1 << GEN8_TILING_W,
36 
37    IMAGE_TILING_ALL  = (IMAGE_TILING_NONE |
38                         IMAGE_TILING_X |
39                         IMAGE_TILING_Y |
40                         IMAGE_TILING_W)
41 };
42 
43 struct ilo_image_layout {
44    enum ilo_image_walk_type walk;
45    bool interleaved_samples;
46 
47    uint8_t valid_tilings;
48    enum gen_surface_tiling tiling;
49 
50    enum ilo_image_aux_type aux;
51 
52    int align_i;
53    int align_j;
54 
55    struct ilo_image_lod *lods;
56    int walk_layer_h0;
57    int walk_layer_h1;
58    int walk_layer_height;
59    int monolithic_width;
60    int monolithic_height;
61 };
62 
63 static enum ilo_image_walk_type
image_get_gen6_walk(const struct ilo_dev * dev,const struct ilo_image_info * info)64 image_get_gen6_walk(const struct ilo_dev *dev,
65                     const struct ilo_image_info *info)
66 {
67    ILO_DEV_ASSERT(dev, 6, 6);
68 
69    /* TODO we want LODs to be page-aligned */
70    if (info->type == GEN6_SURFTYPE_3D)
71       return ILO_IMAGE_WALK_3D;
72 
73    /*
74     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
75     *
76     *     "The separate stencil buffer does not support mip mapping, thus the
77     *      storage for LODs other than LOD 0 is not needed. The following
78     *      QPitch equation applies only to the separate stencil buffer:
79     *
80     *        QPitch = h_0"
81     *
82     * Use ILO_IMAGE_WALK_LOD and manually offset to the (page-aligned) levels
83     * when bound.
84     */
85    if (info->bind_zs && info->format == GEN6_FORMAT_R8_UINT)
86       return ILO_IMAGE_WALK_LOD;
87 
88    /* compact spacing is not supported otherwise */
89    return ILO_IMAGE_WALK_LAYER;
90 }
91 
92 static enum ilo_image_walk_type
image_get_gen7_walk(const struct ilo_dev * dev,const struct ilo_image_info * info)93 image_get_gen7_walk(const struct ilo_dev *dev,
94                     const struct ilo_image_info *info)
95 {
96    ILO_DEV_ASSERT(dev, 7, 8);
97 
98    if (info->type == GEN6_SURFTYPE_3D)
99       return ILO_IMAGE_WALK_3D;
100 
101    /*
102     * From the Ivy Bridge PRM, volume 1 part 1, page 111:
103     *
104     *     "note that the depth buffer and stencil buffer have an implied value
105     *      of ARYSPC_FULL"
106     *
107     * From the Ivy Bridge PRM, volume 4 part 1, page 66:
108     *
109     *     "If Multisampled Surface Storage Format is MSFMT_MSS and Number of
110     *      Multisamples is not MULTISAMPLECOUNT_1, this field (Surface Array
111     *      Spacing) must be set to ARYSPC_LOD0."
112     */
113    if (info->sample_count > 1)
114       assert(info->level_count == 1);
115    return (info->bind_zs || info->level_count > 1) ?
116       ILO_IMAGE_WALK_LAYER : ILO_IMAGE_WALK_LOD;
117 }
118 
119 static bool
image_get_gen6_interleaved_samples(const struct ilo_dev * dev,const struct ilo_image_info * info)120 image_get_gen6_interleaved_samples(const struct ilo_dev *dev,
121                                    const struct ilo_image_info *info)
122 {
123    ILO_DEV_ASSERT(dev, 6, 8);
124 
125    /*
126     * Gen6 supports only interleaved samples.  It is not explicitly stated,
127     * but on Gen7+, render targets are expected to be UMS/CMS (samples
128     * non-interleaved) and depth/stencil buffers are expected to be IMS
129     * (samples interleaved).
130     *
131     * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
132     */
133    return (ilo_dev_gen(dev) == ILO_GEN(6) || info->bind_zs);
134 }
135 
136 static uint8_t
image_get_gen6_valid_tilings(const struct ilo_dev * dev,const struct ilo_image_info * info)137 image_get_gen6_valid_tilings(const struct ilo_dev *dev,
138                              const struct ilo_image_info *info)
139 {
140    uint8_t valid_tilings = IMAGE_TILING_ALL;
141 
142    ILO_DEV_ASSERT(dev, 6, 8);
143 
144    if (info->valid_tilings)
145       valid_tilings &= info->valid_tilings;
146 
147    /*
148     * From the Sandy Bridge PRM, volume 1 part 2, page 32:
149     *
150     *     "Display/Overlay   Y-Major not supported.
151     *                        X-Major required for Async Flips"
152     */
153    if (unlikely(info->bind_scanout))
154       valid_tilings &= IMAGE_TILING_X;
155 
156    /*
157     * From the Sandy Bridge PRM, volume 3 part 2, page 158:
158     *
159     *     "The cursor surface address must be 4K byte aligned. The cursor must
160     *      be in linear memory, it cannot be tiled."
161     */
162    if (unlikely(info->bind_cursor))
163       valid_tilings &= IMAGE_TILING_NONE;
164 
165    /*
166     * From the Sandy Bridge PRM, volume 2 part 1, page 318:
167     *
168     *     "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
169     *      Depth Buffer is not supported."
170     *
171     *     "The Depth Buffer, if tiled, must use Y-Major tiling."
172     *
173     * From the Sandy Bridge PRM, volume 1 part 2, page 22:
174     *
175     *     "W-Major Tile Format is used for separate stencil."
176     */
177    if (info->bind_zs) {
178       if (info->format == GEN6_FORMAT_R8_UINT)
179          valid_tilings &= IMAGE_TILING_W;
180       else
181          valid_tilings &= IMAGE_TILING_Y;
182    }
183 
184    if (info->bind_surface_sampler ||
185        info->bind_surface_dp_render ||
186        info->bind_surface_dp_typed) {
187       /*
188        * From the Haswell PRM, volume 2d, page 233:
189        *
190        *     "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
191        *      (Tiled Surface) must be TRUE."
192        */
193       if (info->sample_count > 1)
194          valid_tilings &= ~IMAGE_TILING_NONE;
195 
196       if (ilo_dev_gen(dev) < ILO_GEN(8))
197          valid_tilings &= ~IMAGE_TILING_W;
198    }
199 
200    if (info->bind_surface_dp_render) {
201       /*
202        * From the Sandy Bridge PRM, volume 1 part 2, page 32:
203        *
204        *     "NOTE: 128BPE Format Color buffer ( render target ) MUST be
205        *      either TileX or Linear."
206        *
207        * From the Haswell PRM, volume 5, page 32:
208        *
209        *     "NOTE: 128 BPP format color buffer (render target) supports
210        *      Linear, TiledX and TiledY."
211        */
212       if (ilo_dev_gen(dev) < ILO_GEN(7.5) && info->block_size == 16)
213          valid_tilings &= ~IMAGE_TILING_Y;
214 
215       /*
216        * From the Ivy Bridge PRM, volume 4 part 1, page 63:
217        *
218        *     "This field (Surface Vertical Aligment) must be set to VALIGN_4
219        *      for all tiled Y Render Target surfaces."
220        *
221        *     "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
222        *
223        * R32G32B32_FLOAT is not renderable and we only need an assert() here.
224        */
225       if (ilo_dev_gen(dev) >= ILO_GEN(7) && ilo_dev_gen(dev) <= ILO_GEN(7.5))
226          assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
227    }
228 
229    return valid_tilings;
230 }
231 
232 static uint64_t
image_get_gen6_estimated_size(const struct ilo_dev * dev,const struct ilo_image_info * info)233 image_get_gen6_estimated_size(const struct ilo_dev *dev,
234                               const struct ilo_image_info *info)
235 {
236    /* padding not considered */
237    const uint64_t slice_size = info->width * info->height *
238       info->block_size / (info->block_width * info->block_height);
239    const uint64_t slice_count =
240       info->depth * info->array_size * info->sample_count;
241    const uint64_t estimated_size = slice_size * slice_count;
242 
243    ILO_DEV_ASSERT(dev, 6, 8);
244 
245    if (info->level_count == 1)
246       return estimated_size;
247    else
248       return estimated_size * 4 / 3;
249 }
250 
251 static enum gen_surface_tiling
image_get_gen6_tiling(const struct ilo_dev * dev,const struct ilo_image_info * info,uint8_t valid_tilings)252 image_get_gen6_tiling(const struct ilo_dev *dev,
253                       const struct ilo_image_info *info,
254                       uint8_t valid_tilings)
255 {
256    ILO_DEV_ASSERT(dev, 6, 8);
257 
258    switch (valid_tilings) {
259    case IMAGE_TILING_NONE:
260       return GEN6_TILING_NONE;
261    case IMAGE_TILING_X:
262       return GEN6_TILING_X;
263    case IMAGE_TILING_Y:
264       return GEN6_TILING_Y;
265    case IMAGE_TILING_W:
266       return GEN8_TILING_W;
267    default:
268       break;
269    }
270 
271    /*
272     * X-tiling has the property that vertically adjacent pixels are usually in
273     * the same page.  When the image size is less than a page, the image
274     * height is 1, or when the image is not accessed in blocks, there is no
275     * reason to tile.
276     *
277     * Y-tiling is similar, where vertically adjacent pixels are usually in the
278     * same cacheline.
279     */
280    if (valid_tilings & IMAGE_TILING_NONE) {
281       const uint64_t estimated_size =
282          image_get_gen6_estimated_size(dev, info);
283 
284       if (info->height == 1 || !(info->bind_surface_sampler ||
285                                  info->bind_surface_dp_render ||
286                                  info->bind_surface_dp_typed))
287          return GEN6_TILING_NONE;
288 
289       if (estimated_size <= 64 || (info->prefer_linear_threshold &&
290                estimated_size > info->prefer_linear_threshold))
291          return GEN6_TILING_NONE;
292 
293       if (estimated_size <= 2048)
294          valid_tilings &= ~IMAGE_TILING_X;
295    }
296 
297    return (valid_tilings & IMAGE_TILING_Y) ? GEN6_TILING_Y :
298           (valid_tilings & IMAGE_TILING_X) ? GEN6_TILING_X :
299           GEN6_TILING_NONE;
300 }
301 
302 static bool
image_get_gen6_hiz_enable(const struct ilo_dev * dev,const struct ilo_image_info * info)303 image_get_gen6_hiz_enable(const struct ilo_dev *dev,
304                           const struct ilo_image_info *info)
305 {
306    ILO_DEV_ASSERT(dev, 6, 8);
307 
308    /* depth buffer? */
309    if (!info->bind_zs ||
310        info->format == GEN6_FORMAT_R8_UINT ||
311        info->interleaved_stencil)
312       return false;
313 
314    /* we want to be able to force 8x4 alignments */
315    if (info->type == GEN6_SURFTYPE_1D)
316       return false;
317 
318    if (info->aux_disable)
319       return false;
320 
321    if (ilo_debug & ILO_DEBUG_NOHIZ)
322       return false;
323 
324    return true;
325 }
326 
327 static bool
image_get_gen7_mcs_enable(const struct ilo_dev * dev,const struct ilo_image_info * info,enum gen_surface_tiling tiling)328 image_get_gen7_mcs_enable(const struct ilo_dev *dev,
329                           const struct ilo_image_info *info,
330                           enum gen_surface_tiling tiling)
331 {
332    ILO_DEV_ASSERT(dev, 7, 8);
333 
334    if (!info->bind_surface_sampler && !info->bind_surface_dp_render)
335       return false;
336 
337    /*
338     * From the Ivy Bridge PRM, volume 4 part 1, page 77:
339     *
340     *     "For Render Target and Sampling Engine Surfaces:If the surface is
341     *      multisampled (Number of Multisamples any value other than
342     *      MULTISAMPLECOUNT_1), this field (MCS Enable) must be enabled."
343     *
344     *     "This field must be set to 0 for all SINT MSRTs when all RT channels
345     *      are not written"
346     */
347    if (info->sample_count > 1) {
348       if (ilo_dev_gen(dev) < ILO_GEN(8))
349          assert(!info->is_integer);
350       return true;
351    }
352 
353    if (info->aux_disable)
354       return false;
355 
356    /*
357     * From the Ivy Bridge PRM, volume 2 part 1, page 326:
358     *
359     *     "When MCS is buffer is used for color clear of non-multisampler
360     *      render target, the following restrictions apply.
361     *      - Support is limited to tiled render targets.
362     *      - Support is for non-mip-mapped and non-array surface types only.
363     *      - Clear is supported only on the full RT; i.e., no partial clear or
364     *        overlapping clears.
365     *      - MCS buffer for non-MSRT is supported only for RT formats 32bpp,
366     *        64bpp and 128bpp.
367     *      ..."
368     *
369     * How about SURFTYPE_3D?
370     */
371    if (!info->bind_surface_dp_render ||
372        tiling == GEN6_TILING_NONE ||
373        info->level_count > 1 ||
374        info->array_size > 1)
375       return false;
376 
377    switch (info->block_size) {
378    case 4:
379    case 8:
380    case 16:
381       return true;
382    default:
383       return false;
384    }
385 }
386 
387 static void
image_get_gen6_alignments(const struct ilo_dev * dev,const struct ilo_image_info * info,int * align_i,int * align_j)388 image_get_gen6_alignments(const struct ilo_dev *dev,
389                           const struct ilo_image_info *info,
390                           int *align_i, int *align_j)
391 {
392    ILO_DEV_ASSERT(dev, 6, 6);
393 
394    /*
395     * From the Sandy Bridge PRM, volume 1 part 1, page 113:
396     *
397     *     "surface format           align_i     align_j
398     *      YUV 4:2:2 formats        4           *see below
399     *      BC1-5                    4           4
400     *      FXT1                     8           4
401     *      all other formats        4           *see below"
402     *
403     *     "- align_j = 4 for any depth buffer
404     *      - align_j = 2 for separate stencil buffer
405     *      - align_j = 4 for any render target surface is multisampled (4x)
406     *      - align_j = 4 for any render target surface with Surface Vertical
407     *        Alignment = VALIGN_4
408     *      - align_j = 2 for any render target surface with Surface Vertical
409     *        Alignment = VALIGN_2
410     *      - align_j = 2 for all other render target surface
411     *      - align_j = 2 for any sampling engine surface with Surface Vertical
412     *        Alignment = VALIGN_2
413     *      - align_j = 4 for any sampling engine surface with Surface Vertical
414     *        Alignment = VALIGN_4"
415     *
416     * From the Sandy Bridge PRM, volume 4 part 1, page 86:
417     *
418     *     "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
419     *      the Surface Format is 96 bits per element (BPE)."
420     *
421     * They can be rephrased as
422     *
423     *                                  align_i        align_j
424     *   compressed formats             block width    block height
425     *   GEN6_FORMAT_R8_UINT            4              2
426     *   other depth/stencil formats    4              4
427     *   4x multisampled                4              4
428     *   bpp 96                         4              2
429     *   others                         4              2 or 4
430     */
431 
432    *align_i = (info->compressed) ? info->block_width : 4;
433    if (info->compressed) {
434       *align_j = info->block_height;
435    } else if (info->bind_zs) {
436       *align_j = (info->format == GEN6_FORMAT_R8_UINT) ? 2 : 4;
437    } else {
438       *align_j = (info->sample_count > 1 || info->block_size != 12) ? 4 : 2;
439    }
440 }
441 
442 static void
image_get_gen7_alignments(const struct ilo_dev * dev,const struct ilo_image_info * info,enum gen_surface_tiling tiling,int * align_i,int * align_j)443 image_get_gen7_alignments(const struct ilo_dev *dev,
444                           const struct ilo_image_info *info,
445                           enum gen_surface_tiling tiling,
446                           int *align_i, int *align_j)
447 {
448    int i, j;
449 
450    ILO_DEV_ASSERT(dev, 7, 8);
451 
452    /*
453     * From the Ivy Bridge PRM, volume 1 part 1, page 110:
454     *
455     *     "surface defined by      surface format     align_i     align_j
456     *      3DSTATE_DEPTH_BUFFER    D16_UNORM          8           4
457     *                              not D16_UNORM      4           4
458     *      3DSTATE_STENCIL_BUFFER  N/A                8           8
459     *      SURFACE_STATE           BC*, ETC*, EAC*    4           4
460     *                              FXT1               8           4
461     *                              all others         (set by SURFACE_STATE)"
462     *
463     * From the Ivy Bridge PRM, volume 4 part 1, page 63:
464     *
465     *     "- This field (Surface Vertical Aligment) is intended to be set to
466     *        VALIGN_4 if the surface was rendered as a depth buffer, for a
467     *        multisampled (4x) render target, or for a multisampled (8x)
468     *        render target, since these surfaces support only alignment of 4.
469     *      - Use of VALIGN_4 for other surfaces is supported, but uses more
470     *        memory.
471     *      - This field must be set to VALIGN_4 for all tiled Y Render Target
472     *        surfaces.
473     *      - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
474     *        YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
475     *      - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
476     *        must be set to VALIGN_4."
477     *      - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
478     *
479     *     "- This field (Surface Horizontal Aligment) is intended to be set to
480     *        HALIGN_8 only if the surface was rendered as a depth buffer with
481     *        Z16 format or a stencil buffer, since these surfaces support only
482     *        alignment of 8.
483     *      - Use of HALIGN_8 for other surfaces is supported, but uses more
484     *        memory.
485     *      - This field must be set to HALIGN_4 if the Surface Format is BC*.
486     *      - This field must be set to HALIGN_8 if the Surface Format is
487     *        FXT1."
488     *
489     * They can be rephrased as
490     *
491     *                                  align_i        align_j
492     *  compressed formats              block width    block height
493     *  GEN6_FORMAT_R16_UNORM           8              4
494     *  GEN6_FORMAT_R8_UINT             8              8
495     *  other depth/stencil formats     4              4
496     *  2x or 4x multisampled           4 or 8         4
497     *  tiled Y                         4 or 8         4 (if rt)
498     *  GEN6_FORMAT_R32G32B32_FLOAT     4 or 8         2
499     *  others                          4 or 8         2 or 4
500     */
501    if (info->compressed) {
502       i = info->block_width;
503       j = info->block_height;
504    } else if (info->bind_zs) {
505       switch (info->format) {
506       case GEN6_FORMAT_R16_UNORM:
507          i = 8;
508          j = 4;
509          break;
510       case GEN6_FORMAT_R8_UINT:
511          i = 8;
512          j = 8;
513          break;
514       default:
515          i = 4;
516          j = 4;
517          break;
518       }
519    } else {
520       const bool valign_4 =
521          (info->sample_count > 1 || ilo_dev_gen(dev) >= ILO_GEN(8) ||
522           (tiling == GEN6_TILING_Y && info->bind_surface_dp_render));
523 
524       if (ilo_dev_gen(dev) < ILO_GEN(8) && valign_4)
525          assert(info->format != GEN6_FORMAT_R32G32B32_FLOAT);
526 
527       i = 4;
528       j = (valign_4) ? 4 : 2;
529    }
530 
531    *align_i = i;
532    *align_j = j;
533 }
534 
535 static bool
image_init_gen6_hardware_layout(const struct ilo_dev * dev,const struct ilo_image_info * info,struct ilo_image_layout * layout)536 image_init_gen6_hardware_layout(const struct ilo_dev *dev,
537                                 const struct ilo_image_info *info,
538                                 struct ilo_image_layout *layout)
539 {
540    ILO_DEV_ASSERT(dev, 6, 8);
541 
542    if (ilo_dev_gen(dev) >= ILO_GEN(7))
543       layout->walk = image_get_gen7_walk(dev, info);
544    else
545       layout->walk = image_get_gen6_walk(dev, info);
546 
547    layout->interleaved_samples =
548       image_get_gen6_interleaved_samples(dev, info);
549 
550    layout->valid_tilings = image_get_gen6_valid_tilings(dev, info);
551    if (!layout->valid_tilings)
552       return false;
553 
554    layout->tiling = image_get_gen6_tiling(dev, info, layout->valid_tilings);
555 
556    if (image_get_gen6_hiz_enable(dev, info))
557       layout->aux = ILO_IMAGE_AUX_HIZ;
558    else if (ilo_dev_gen(dev) >= ILO_GEN(7) &&
559             image_get_gen7_mcs_enable(dev, info, layout->tiling))
560       layout->aux = ILO_IMAGE_AUX_MCS;
561    else
562       layout->aux = ILO_IMAGE_AUX_NONE;
563 
564    if (ilo_dev_gen(dev) >= ILO_GEN(7)) {
565       image_get_gen7_alignments(dev, info, layout->tiling,
566             &layout->align_i, &layout->align_j);
567    } else {
568       image_get_gen6_alignments(dev, info,
569             &layout->align_i, &layout->align_j);
570    }
571 
572    return true;
573 }
574 
575 static bool
image_init_gen6_transfer_layout(const struct ilo_dev * dev,const struct ilo_image_info * info,struct ilo_image_layout * layout)576 image_init_gen6_transfer_layout(const struct ilo_dev *dev,
577                                 const struct ilo_image_info *info,
578                                 struct ilo_image_layout *layout)
579 {
580    ILO_DEV_ASSERT(dev, 6, 8);
581 
582    /* we can define our own layout to save space */
583    layout->walk = ILO_IMAGE_WALK_LOD;
584    layout->interleaved_samples = false;
585    layout->valid_tilings = IMAGE_TILING_NONE;
586    layout->tiling = GEN6_TILING_NONE;
587    layout->aux = ILO_IMAGE_AUX_NONE;
588    layout->align_i = info->block_width;
589    layout->align_j = info->block_height;
590 
591    return true;
592 }
593 
594 static void
image_get_gen6_slice_size(const struct ilo_dev * dev,const struct ilo_image_info * info,const struct ilo_image_layout * layout,uint8_t level,int * width,int * height)595 image_get_gen6_slice_size(const struct ilo_dev *dev,
596                           const struct ilo_image_info *info,
597                           const struct ilo_image_layout *layout,
598                           uint8_t level,
599                           int *width, int *height)
600 {
601    int w, h;
602 
603    ILO_DEV_ASSERT(dev, 6, 8);
604 
605    w = u_minify(info->width, level);
606    h = u_minify(info->height, level);
607 
608    /*
609     * From the Sandy Bridge PRM, volume 1 part 1, page 114:
610     *
611     *     "The dimensions of the mip maps are first determined by applying the
612     *      sizing algorithm presented in Non-Power-of-Two Mipmaps above. Then,
613     *      if necessary, they are padded out to compression block boundaries."
614     */
615    w = align(w, info->block_width);
616    h = align(h, info->block_height);
617 
618    /*
619     * From the Sandy Bridge PRM, volume 1 part 1, page 111:
620     *
621     *     "If the surface is multisampled (4x), these values must be adjusted
622     *      as follows before proceeding:
623     *
624     *        W_L = ceiling(W_L / 2) * 4
625     *        H_L = ceiling(H_L / 2) * 4"
626     *
627     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
628     *
629     *     "If the surface is multisampled and it is a depth or stencil surface
630     *      or Multisampled Surface StorageFormat in SURFACE_STATE is
631     *      MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows before
632     *      proceeding:
633     *
634     *        #samples  W_L =                    H_L =
635     *        2         ceiling(W_L / 2) * 4     HL [no adjustment]
636     *        4         ceiling(W_L / 2) * 4     ceiling(H_L / 2) * 4
637     *        8         ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 4
638     *        16        ceiling(W_L / 2) * 8     ceiling(H_L / 2) * 8"
639     *
640     * For interleaved samples (4x), where pixels
641     *
642     *   (x, y  ) (x+1, y  )
643     *   (x, y+1) (x+1, y+1)
644     *
645     * would be is occupied by
646     *
647     *   (x, y  , si0) (x+1, y  , si0) (x, y  , si1) (x+1, y  , si1)
648     *   (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
649     *   (x, y  , si2) (x+1, y  , si2) (x, y  , si3) (x+1, y  , si3)
650     *   (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
651     *
652     * Thus the need to
653     *
654     *   w = align(w, 2) * 2;
655     *   y = align(y, 2) * 2;
656     */
657    if (layout->interleaved_samples) {
658       switch (info->sample_count) {
659       case 1:
660          break;
661       case 2:
662          w = align(w, 2) * 2;
663          break;
664       case 4:
665          w = align(w, 2) * 2;
666          h = align(h, 2) * 2;
667          break;
668       case 8:
669          w = align(w, 2) * 4;
670          h = align(h, 2) * 2;
671          break;
672       case 16:
673          w = align(w, 2) * 4;
674          h = align(h, 2) * 4;
675          break;
676       default:
677          assert(!"unsupported sample count");
678          break;
679       }
680    }
681 
682    /*
683     * From the Ivy Bridge PRM, volume 1 part 1, page 108:
684     *
685     *     "For separate stencil buffer, the width must be mutiplied by 2 and
686     *      height divided by 2..."
687     *
688     * To make things easier (for transfer), we will just double the stencil
689     * stride in 3DSTATE_STENCIL_BUFFER.
690     */
691    w = align(w, layout->align_i);
692    h = align(h, layout->align_j);
693 
694    *width = w;
695    *height = h;
696 }
697 
698 static int
image_get_gen6_layer_count(const struct ilo_dev * dev,const struct ilo_image_info * info,const struct ilo_image_layout * layout)699 image_get_gen6_layer_count(const struct ilo_dev *dev,
700                            const struct ilo_image_info *info,
701                            const struct ilo_image_layout *layout)
702 {
703    int count = info->array_size;
704 
705    ILO_DEV_ASSERT(dev, 6, 8);
706 
707    /* samples of the same index are stored in a layer */
708    if (!layout->interleaved_samples)
709       count *= info->sample_count;
710 
711    return count;
712 }
713 
714 static void
image_get_gen6_walk_layer_heights(const struct ilo_dev * dev,const struct ilo_image_info * info,struct ilo_image_layout * layout)715 image_get_gen6_walk_layer_heights(const struct ilo_dev *dev,
716                                   const struct ilo_image_info *info,
717                                   struct ilo_image_layout *layout)
718 {
719    ILO_DEV_ASSERT(dev, 6, 8);
720 
721    layout->walk_layer_h0 = layout->lods[0].slice_height;
722 
723    if (info->level_count > 1) {
724       layout->walk_layer_h1 = layout->lods[1].slice_height;
725    } else {
726       int dummy;
727       image_get_gen6_slice_size(dev, info, layout, 1,
728             &dummy, &layout->walk_layer_h1);
729    }
730 
731    if (image_get_gen6_layer_count(dev, info, layout) == 1) {
732       layout->walk_layer_height = 0;
733       return;
734    }
735 
736    /*
737     * From the Sandy Bridge PRM, volume 1 part 1, page 115:
738     *
739     *     "The following equation is used for surface formats other than
740     *      compressed textures:
741     *
742     *        QPitch = (h0 + h1 + 11j)"
743     *
744     *     "The equation for compressed textures (BC* and FXT1 surface formats)
745     *      follows:
746     *
747     *        QPitch = (h0 + h1 + 11j) / 4"
748     *
749     *     "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
750     *      value calculated in the equation above, for every other odd Surface
751     *      Height starting from 1 i.e. 1,5,9,13"
752     *
753     * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
754     *
755     *     "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
756     *      buffer and stencil buffer have an implied value of ARYSPC_FULL):
757     *
758     *        QPitch = (h0 + h1 + 12j)
759     *        QPitch = (h0 + h1 + 12j) / 4 (compressed)
760     *
761     *      (There are many typos or missing words here...)"
762     *
763     * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
764     * the base address.  The PRM divides QPitch by 4 for compressed formats
765     * because the block height for those formats are 4, and it wants QPitch to
766     * mean the number of memory rows, as opposed to texel rows, between
767     * slices.  Since we use texel rows everywhere, we do not need to divide
768     * QPitch by 4.
769     */
770    layout->walk_layer_height = layout->walk_layer_h0 + layout->walk_layer_h1 +
771       ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
772 
773    if (ilo_dev_gen(dev) == ILO_GEN(6) && info->sample_count > 1 &&
774        info->height % 4 == 1)
775       layout->walk_layer_height += 4;
776 }
777 
778 static void
image_get_gen6_monolithic_size(const struct ilo_dev * dev,const struct ilo_image_info * info,struct ilo_image_layout * layout,int max_x,int max_y)779 image_get_gen6_monolithic_size(const struct ilo_dev *dev,
780                                const struct ilo_image_info *info,
781                                struct ilo_image_layout *layout,
782                                int max_x, int max_y)
783 {
784    int align_w = 1, align_h = 1, pad_h = 0;
785 
786    ILO_DEV_ASSERT(dev, 6, 8);
787 
788    /*
789     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
790     *
791     *     "To determine the necessary padding on the bottom and right side of
792     *      the surface, refer to the table in Section 7.18.3.4 for the i and j
793     *      parameters for the surface format in use. The surface must then be
794     *      extended to the next multiple of the alignment unit size in each
795     *      dimension, and all texels contained in this extended surface must
796     *      have valid GTT entries."
797     *
798     *     "For cube surfaces, an additional two rows of padding are required
799     *      at the bottom of the surface. This must be ensured regardless of
800     *      whether the surface is stored tiled or linear.  This is due to the
801     *      potential rotation of cache line orientation from memory to cache."
802     *
803     *     "For compressed textures (BC* and FXT1 surface formats), padding at
804     *      the bottom of the surface is to an even compressed row, which is
805     *      equal to a multiple of 8 uncompressed texel rows. Thus, for padding
806     *      purposes, these surfaces behave as if j = 8 only for surface
807     *      padding purposes. The value of 4 for j still applies for mip level
808     *      alignment and QPitch calculation."
809     */
810    if (info->bind_surface_sampler) {
811       align_w = MAX2(align_w, layout->align_i);
812       align_h = MAX2(align_h, layout->align_j);
813 
814       if (info->type == GEN6_SURFTYPE_CUBE)
815          pad_h += 2;
816 
817       if (info->compressed)
818          align_h = MAX2(align_h, layout->align_j * 2);
819    }
820 
821    /*
822     * From the Sandy Bridge PRM, volume 1 part 1, page 118:
823     *
824     *     "If the surface contains an odd number of rows of data, a final row
825     *      below the surface must be allocated."
826     */
827    if (info->bind_surface_dp_render)
828       align_h = MAX2(align_h, 2);
829 
830    /*
831     * Depth Buffer Clear/Resolve works in 8x4 sample blocks.  Pad to allow HiZ
832     * for unaligned non-mipmapped and non-array images.
833     */
834    if (layout->aux == ILO_IMAGE_AUX_HIZ &&
835        info->level_count == 1 && info->array_size == 1 && info->depth == 1) {
836       align_w = MAX2(align_w, 8);
837       align_h = MAX2(align_h, 4);
838    }
839 
840    layout->monolithic_width = align(max_x, align_w);
841    layout->monolithic_height = align(max_y + pad_h, align_h);
842 }
843 
844 static void
image_get_gen6_lods(const struct ilo_dev * dev,const struct ilo_image_info * info,struct ilo_image_layout * layout)845 image_get_gen6_lods(const struct ilo_dev *dev,
846                     const struct ilo_image_info *info,
847                     struct ilo_image_layout *layout)
848 {
849    const int layer_count = image_get_gen6_layer_count(dev, info, layout);
850    int cur_x, cur_y, max_x, max_y;
851    uint8_t lv;
852 
853    ILO_DEV_ASSERT(dev, 6, 8);
854 
855    cur_x = 0;
856    cur_y = 0;
857    max_x = 0;
858    max_y = 0;
859    for (lv = 0; lv < info->level_count; lv++) {
860       int slice_w, slice_h, lod_w, lod_h;
861 
862       image_get_gen6_slice_size(dev, info, layout, lv, &slice_w, &slice_h);
863 
864       layout->lods[lv].x = cur_x;
865       layout->lods[lv].y = cur_y;
866       layout->lods[lv].slice_width = slice_w;
867       layout->lods[lv].slice_height = slice_h;
868 
869       switch (layout->walk) {
870       case ILO_IMAGE_WALK_LAYER:
871          lod_w = slice_w;
872          lod_h = slice_h;
873 
874          /* MIPLAYOUT_BELOW */
875          if (lv == 1)
876             cur_x += lod_w;
877          else
878             cur_y += lod_h;
879          break;
880       case ILO_IMAGE_WALK_LOD:
881          lod_w = slice_w;
882          lod_h = slice_h * layer_count;
883 
884          if (lv == 1)
885             cur_x += lod_w;
886          else
887             cur_y += lod_h;
888 
889          /* every LOD begins at tile boundaries */
890          if (info->level_count > 1) {
891             assert(info->format == GEN6_FORMAT_R8_UINT);
892             cur_x = align(cur_x, 64);
893             cur_y = align(cur_y, 64);
894          }
895          break;
896       case ILO_IMAGE_WALK_3D:
897          {
898             const int slice_count = u_minify(info->depth, lv);
899             const int slice_count_per_row = 1 << lv;
900             const int row_count =
901                (slice_count + slice_count_per_row - 1) / slice_count_per_row;
902 
903             lod_w = slice_w * slice_count_per_row;
904             lod_h = slice_h * row_count;
905          }
906 
907          cur_y += lod_h;
908          break;
909       default:
910          assert(!"unknown walk type");
911          lod_w = 0;
912          lod_h = 0;
913          break;
914       }
915 
916       if (max_x < layout->lods[lv].x + lod_w)
917          max_x = layout->lods[lv].x + lod_w;
918       if (max_y < layout->lods[lv].y + lod_h)
919          max_y = layout->lods[lv].y + lod_h;
920    }
921 
922    if (layout->walk == ILO_IMAGE_WALK_LAYER) {
923       image_get_gen6_walk_layer_heights(dev, info, layout);
924       if (layer_count > 1)
925          max_y += layout->walk_layer_height * (layer_count - 1);
926    } else {
927       layout->walk_layer_h0 = 0;
928       layout->walk_layer_h1 = 0;
929       layout->walk_layer_height = 0;
930    }
931 
932    image_get_gen6_monolithic_size(dev, info, layout, max_x, max_y);
933 }
934 
935 static bool
image_bind_gpu(const struct ilo_image_info * info)936 image_bind_gpu(const struct ilo_image_info *info)
937 {
938    return (info->bind_surface_sampler ||
939            info->bind_surface_dp_render ||
940            info->bind_surface_dp_typed ||
941            info->bind_zs ||
942            info->bind_scanout ||
943            info->bind_cursor);
944 }
945 
946 static bool
image_validate_gen6(const struct ilo_dev * dev,const struct ilo_image_info * info)947 image_validate_gen6(const struct ilo_dev *dev,
948                     const struct ilo_image_info *info)
949 {
950    ILO_DEV_ASSERT(dev, 6, 8);
951 
952    /*
953     * From the Ivy Bridge PRM, volume 2 part 1, page 314:
954     *
955     *     "The separate stencil buffer is always enabled, thus the field in
956     *      3DSTATE_DEPTH_BUFFER to explicitly enable the separate stencil
957     *      buffer has been removed Surface formats with interleaved depth and
958     *      stencil are no longer supported"
959     */
960    if (ilo_dev_gen(dev) >= ILO_GEN(7) && info->bind_zs)
961       assert(!info->interleaved_stencil);
962 
963    return true;
964 }
965 
966 static bool
image_get_gen6_layout(const struct ilo_dev * dev,const struct ilo_image_info * info,struct ilo_image_layout * layout)967 image_get_gen6_layout(const struct ilo_dev *dev,
968                       const struct ilo_image_info *info,
969                       struct ilo_image_layout *layout)
970 {
971    ILO_DEV_ASSERT(dev, 6, 8);
972 
973    if (!image_validate_gen6(dev, info))
974       return false;
975 
976    if (image_bind_gpu(info) || info->level_count > 1) {
977       if (!image_init_gen6_hardware_layout(dev, info, layout))
978          return false;
979    } else {
980       if (!image_init_gen6_transfer_layout(dev, info, layout))
981          return false;
982    }
983 
984    /*
985     * the fact that align i and j are multiples of block width and height
986     * respectively is what makes the size of the bo a multiple of the block
987     * size, slices start at block boundaries, and many of the computations
988     * work.
989     */
990    assert(layout->align_i % info->block_width == 0);
991    assert(layout->align_j % info->block_height == 0);
992 
993    /* make sure align() works */
994    assert(util_is_power_of_two(layout->align_i) &&
995           util_is_power_of_two(layout->align_j));
996    assert(util_is_power_of_two(info->block_width) &&
997           util_is_power_of_two(info->block_height));
998 
999    image_get_gen6_lods(dev, info, layout);
1000 
1001    assert(layout->walk_layer_height % info->block_height == 0);
1002    assert(layout->monolithic_width % info->block_width == 0);
1003    assert(layout->monolithic_height % info->block_height == 0);
1004 
1005    return true;
1006 }
1007 
1008 static bool
image_set_gen6_bo_size(struct ilo_image * img,const struct ilo_dev * dev,const struct ilo_image_info * info,const struct ilo_image_layout * layout)1009 image_set_gen6_bo_size(struct ilo_image *img,
1010                        const struct ilo_dev *dev,
1011                        const struct ilo_image_info *info,
1012                        const struct ilo_image_layout *layout)
1013 {
1014    int stride, height;
1015    int align_w, align_h;
1016 
1017    ILO_DEV_ASSERT(dev, 6, 8);
1018 
1019    stride = (layout->monolithic_width / info->block_width) * info->block_size;
1020    height = layout->monolithic_height / info->block_height;
1021 
1022    /*
1023     * From the Haswell PRM, volume 5, page 163:
1024     *
1025     *     "For linear surfaces, additional padding of 64 bytes is required
1026     *      at the bottom of the surface. This is in addition to the padding
1027     *      required above."
1028     */
1029    if (ilo_dev_gen(dev) >= ILO_GEN(7.5) && info->bind_surface_sampler &&
1030        layout->tiling == GEN6_TILING_NONE)
1031       height += (64 + stride - 1) / stride;
1032 
1033    /*
1034     * From the Sandy Bridge PRM, volume 4 part 1, page 81:
1035     *
1036     *     "- For linear render target surfaces, the pitch must be a multiple
1037     *        of the element size for non-YUV surface formats.  Pitch must be a
1038     *        multiple of 2 * element size for YUV surface formats.
1039     *
1040     *      - For other linear surfaces, the pitch can be any multiple of
1041     *        bytes.
1042     *      - For tiled surfaces, the pitch must be a multiple of the tile
1043     *        width."
1044     *
1045     * Different requirements may exist when the image is used in different
1046     * places, but our alignments here should be good enough that we do not
1047     * need to check info->bind_x.
1048     */
1049    switch (layout->tiling) {
1050    case GEN6_TILING_X:
1051       align_w = 512;
1052       align_h = 8;
1053       break;
1054    case GEN6_TILING_Y:
1055       align_w = 128;
1056       align_h = 32;
1057       break;
1058    case GEN8_TILING_W:
1059       /*
1060        * From the Sandy Bridge PRM, volume 1 part 2, page 22:
1061        *
1062        *     "A 4KB tile is subdivided into 8-high by 8-wide array of
1063        *      Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
1064        *      bytes."
1065        */
1066       align_w = 64;
1067       align_h = 64;
1068       break;
1069    default:
1070       assert(layout->tiling == GEN6_TILING_NONE);
1071       /* some good enough values */
1072       align_w = 64;
1073       align_h = 2;
1074       break;
1075    }
1076 
1077    if (info->force_bo_stride) {
1078       if (info->force_bo_stride % align_w || info->force_bo_stride < stride)
1079          return false;
1080 
1081       img->bo_stride = info->force_bo_stride;
1082    } else {
1083       img->bo_stride = align(stride, align_w);
1084    }
1085 
1086    img->bo_height = align(height, align_h);
1087 
1088    return true;
1089 }
1090 
1091 static bool
image_set_gen6_hiz(struct ilo_image * img,const struct ilo_dev * dev,const struct ilo_image_info * info,const struct ilo_image_layout * layout)1092 image_set_gen6_hiz(struct ilo_image *img,
1093                    const struct ilo_dev *dev,
1094                    const struct ilo_image_info *info,
1095                    const struct ilo_image_layout *layout)
1096 {
1097    const int hz_align_j = 8;
1098    enum ilo_image_walk_type hz_walk;
1099    int hz_width, hz_height;
1100    int hz_clear_w, hz_clear_h;
1101    uint8_t lv;
1102 
1103    ILO_DEV_ASSERT(dev, 6, 8);
1104 
1105    assert(layout->aux == ILO_IMAGE_AUX_HIZ);
1106 
1107    assert(layout->walk == ILO_IMAGE_WALK_LAYER ||
1108           layout->walk == ILO_IMAGE_WALK_3D);
1109 
1110    /*
1111     * From the Sandy Bridge PRM, volume 2 part 1, page 312:
1112     *
1113     *     "The hierarchical depth buffer does not support the LOD field, it is
1114     *      assumed by hardware to be zero. A separate hierarachical depth
1115     *      buffer is required for each LOD used, and the corresponding
1116     *      buffer's state delivered to hardware each time a new depth buffer
1117     *      state with modified LOD is delivered."
1118     *
1119     * We will put all LODs in a single bo with ILO_IMAGE_WALK_LOD.
1120     */
1121    if (ilo_dev_gen(dev) >= ILO_GEN(7))
1122       hz_walk = layout->walk;
1123    else
1124       hz_walk = ILO_IMAGE_WALK_LOD;
1125 
1126    /*
1127     * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1128     * PRM, volume 2 part 1, page 312-313.
1129     *
1130     * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1131     * memory row.
1132     */
1133    switch (hz_walk) {
1134    case ILO_IMAGE_WALK_LAYER:
1135       {
1136          const int h0 = align(layout->walk_layer_h0, hz_align_j);
1137          const int h1 = align(layout->walk_layer_h1, hz_align_j);
1138          const int htail =
1139             ((ilo_dev_gen(dev) >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1140          const int hz_qpitch = h0 + h1 + htail;
1141 
1142          hz_width = align(layout->lods[0].slice_width, 16);
1143 
1144          hz_height = hz_qpitch * info->array_size / 2;
1145          if (ilo_dev_gen(dev) >= ILO_GEN(7))
1146             hz_height = align(hz_height, 8);
1147 
1148          img->aux.walk_layer_height = hz_qpitch;
1149       }
1150       break;
1151    case ILO_IMAGE_WALK_LOD:
1152       {
1153          int lod_tx[ILO_IMAGE_MAX_LEVEL_COUNT];
1154          int lod_ty[ILO_IMAGE_MAX_LEVEL_COUNT];
1155          int cur_tx, cur_ty;
1156 
1157          /* figure out the tile offsets of LODs */
1158          hz_width = 0;
1159          hz_height = 0;
1160          cur_tx = 0;
1161          cur_ty = 0;
1162          for (lv = 0; lv < info->level_count; lv++) {
1163             int tw, th;
1164 
1165             lod_tx[lv] = cur_tx;
1166             lod_ty[lv] = cur_ty;
1167 
1168             tw = align(layout->lods[lv].slice_width, 16);
1169             th = align(layout->lods[lv].slice_height, hz_align_j) *
1170                info->array_size / 2;
1171             /* convert to Y-tiles */
1172             tw = (tw + 127) / 128;
1173             th = (th + 31) / 32;
1174 
1175             if (hz_width < cur_tx + tw)
1176                hz_width = cur_tx + tw;
1177             if (hz_height < cur_ty + th)
1178                hz_height = cur_ty + th;
1179 
1180             if (lv == 1)
1181                cur_tx += tw;
1182             else
1183                cur_ty += th;
1184          }
1185 
1186          /* convert tile offsets to memory offsets */
1187          for (lv = 0; lv < info->level_count; lv++) {
1188             img->aux.walk_lod_offsets[lv] =
1189                (lod_ty[lv] * hz_width + lod_tx[lv]) * 4096;
1190          }
1191 
1192          hz_width *= 128;
1193          hz_height *= 32;
1194       }
1195       break;
1196    case ILO_IMAGE_WALK_3D:
1197       hz_width = align(layout->lods[0].slice_width, 16);
1198 
1199       hz_height = 0;
1200       for (lv = 0; lv < info->level_count; lv++) {
1201          const int h = align(layout->lods[lv].slice_height, hz_align_j);
1202          /* according to the formula, slices are packed together vertically */
1203          hz_height += h * u_minify(info->depth, lv);
1204       }
1205       hz_height /= 2;
1206       break;
1207    default:
1208       assert(!"unknown HiZ walk");
1209       hz_width = 0;
1210       hz_height = 0;
1211       break;
1212    }
1213 
1214    /*
1215     * In hiz_align_fb(), we will align the LODs to 8x4 sample blocks.
1216     * Experiments on Haswell show that aligning the RECTLIST primitive and
1217     * 3DSTATE_DRAWING_RECTANGLE alone are not enough.  The LOD sizes must be
1218     * aligned.
1219     */
1220    hz_clear_w = 8;
1221    hz_clear_h = 4;
1222    switch (info->sample_count) {
1223    case 1:
1224    default:
1225       break;
1226    case 2:
1227       hz_clear_w /= 2;
1228       break;
1229    case 4:
1230       hz_clear_w /= 2;
1231       hz_clear_h /= 2;
1232       break;
1233    case 8:
1234       hz_clear_w /= 4;
1235       hz_clear_h /= 2;
1236       break;
1237    case 16:
1238       hz_clear_w /= 4;
1239       hz_clear_h /= 4;
1240       break;
1241    }
1242 
1243    for (lv = 0; lv < info->level_count; lv++) {
1244       if (u_minify(info->width, lv) % hz_clear_w ||
1245           u_minify(info->height, lv) % hz_clear_h)
1246          break;
1247       img->aux.enables |= 1 << lv;
1248    }
1249 
1250    /* we padded to allow this in image_get_gen6_monolithic_size() */
1251    if (info->level_count == 1 && info->array_size == 1 && info->depth == 1)
1252       img->aux.enables |= 0x1;
1253 
1254    /* align to Y-tile */
1255    img->aux.bo_stride = align(hz_width, 128);
1256    img->aux.bo_height = align(hz_height, 32);
1257 
1258    return true;
1259 }
1260 
1261 static bool
image_set_gen7_mcs(struct ilo_image * img,const struct ilo_dev * dev,const struct ilo_image_info * info,const struct ilo_image_layout * layout)1262 image_set_gen7_mcs(struct ilo_image *img,
1263                    const struct ilo_dev *dev,
1264                    const struct ilo_image_info *info,
1265                    const struct ilo_image_layout *layout)
1266 {
1267    int mcs_width, mcs_height, mcs_cpp;
1268    int downscale_x, downscale_y;
1269 
1270    ILO_DEV_ASSERT(dev, 7, 8);
1271 
1272    assert(layout->aux == ILO_IMAGE_AUX_MCS);
1273 
1274    if (info->sample_count > 1) {
1275       /*
1276        * From the Ivy Bridge PRM, volume 2 part 1, page 326, the clear
1277        * rectangle is scaled down by 8x2 for 4X MSAA and 2x2 for 8X MSAA.  The
1278        * need of scale down could be that the clear rectangle is used to clear
1279        * the MCS instead of the RT.
1280        *
1281        * For 8X MSAA, we need 32 bits in MCS for every pixel in the RT.  The
1282        * 2x2 factor could come from that the hardware writes 128 bits (an
1283        * OWord) at a time, and the OWord in MCS maps to a 2x2 pixel block in
1284        * the RT.  For 4X MSAA, we need 8 bits in MCS for every pixel in the
1285        * RT.  Similarly, we could reason that an OWord in 4X MCS maps to a 8x2
1286        * pixel block in the RT.
1287        */
1288       switch (info->sample_count) {
1289       case 2:
1290       case 4:
1291          downscale_x = 8;
1292          downscale_y = 2;
1293          mcs_cpp = 1;
1294          break;
1295       case 8:
1296          downscale_x = 2;
1297          downscale_y = 2;
1298          mcs_cpp = 4;
1299          break;
1300       case 16:
1301          downscale_x = 2;
1302          downscale_y = 1;
1303          mcs_cpp = 8;
1304          break;
1305       default:
1306          assert(!"unsupported sample count");
1307          return false;
1308          break;
1309       }
1310 
1311       /*
1312        * It also appears that the 2x2 subspans generated by the scaled-down
1313        * clear rectangle cannot be masked.  The scale-down clear rectangle
1314        * thus must be aligned to 2x2, and we need to pad.
1315        */
1316       mcs_width = align(info->width, downscale_x * 2);
1317       mcs_height = align(info->height, downscale_y * 2);
1318    } else {
1319       /*
1320        * From the Ivy Bridge PRM, volume 2 part 1, page 327:
1321        *
1322        *     "              Pixels  Lines
1323        *      TiledY RT CL
1324        *          bpp
1325        *          32          8        4
1326        *          64          4        4
1327        *          128         2        4
1328        *
1329        *      TiledX RT CL
1330        *          bpp
1331        *          32          16       2
1332        *          64          8        2
1333        *          128         4        2"
1334        *
1335        * This table and the two following tables define the RT alignments, the
1336        * clear rectangle alignments, and the clear rectangle scale factors.
1337        * Viewing the RT alignments as the sizes of 128-byte blocks, we can see
1338        * that the clear rectangle alignments are 16x32 blocks, and the clear
1339        * rectangle scale factors are 8x16 blocks.
1340        *
1341        * For non-MSAA RT, we need 1 bit in MCS for every 128-byte block in the
1342        * RT.  Similar to the MSAA cases, we can argue that an OWord maps to
1343        * 8x16 blocks.
1344        *
1345        * One problem with this reasoning is that a Y-tile in MCS has 8x32
1346        * OWords and maps to 64x512 128-byte blocks.  This differs from i965,
1347        * which says that a Y-tile maps to 128x256 blocks (\see
1348        * intel_get_non_msrt_mcs_alignment).  It does not really change
1349        * anything except for the size of the allocated MCS.  Let's see if we
1350        * hit out-of-bound access.
1351        */
1352       switch (layout->tiling) {
1353       case GEN6_TILING_X:
1354          downscale_x = 64 / info->block_size;
1355          downscale_y = 2;
1356          break;
1357       case GEN6_TILING_Y:
1358          downscale_x = 32 / info->block_size;
1359          downscale_y = 4;
1360          break;
1361       default:
1362          assert(!"unsupported tiling mode");
1363          return false;
1364          break;
1365       }
1366 
1367       downscale_x *= 8;
1368       downscale_y *= 16;
1369 
1370       /*
1371        * From the Haswell PRM, volume 7, page 652:
1372        *
1373        *     "Clear rectangle must be aligned to two times the number of
1374        *      pixels in the table shown below due to 16X16 hashing across the
1375        *      slice."
1376        *
1377        * The scaled-down clear rectangle must be aligned to 4x4 instead of
1378        * 2x2, and we need to pad.
1379        */
1380       mcs_width = align(info->width, downscale_x * 4) / downscale_x;
1381       mcs_height = align(info->height, downscale_y * 4) / downscale_y;
1382       mcs_cpp = 16; /* an OWord */
1383    }
1384 
1385    img->aux.enables = (1 << info->level_count) - 1;
1386    /* align to Y-tile */
1387    img->aux.bo_stride = align(mcs_width * mcs_cpp, 128);
1388    img->aux.bo_height = align(mcs_height, 32);
1389 
1390    return true;
1391 }
1392 
1393 bool
ilo_image_init(struct ilo_image * img,const struct ilo_dev * dev,const struct ilo_image_info * info)1394 ilo_image_init(struct ilo_image *img,
1395                const struct ilo_dev *dev,
1396                const struct ilo_image_info *info)
1397 {
1398    struct ilo_image_layout layout;
1399 
1400    assert(ilo_is_zeroed(img, sizeof(*img)));
1401 
1402    memset(&layout, 0, sizeof(layout));
1403    layout.lods = img->lods;
1404 
1405    if (!image_get_gen6_layout(dev, info, &layout))
1406       return false;
1407 
1408    img->type = info->type;
1409 
1410    img->format = info->format;
1411    img->block_width = info->block_width;
1412    img->block_height = info->block_height;
1413    img->block_size = info->block_size;
1414 
1415    img->width0 = info->width;
1416    img->height0 = info->height;
1417    img->depth0 = info->depth;
1418    img->array_size = info->array_size;
1419    img->level_count = info->level_count;
1420    img->sample_count = info->sample_count;
1421 
1422    img->walk = layout.walk;
1423    img->interleaved_samples = layout.interleaved_samples;
1424 
1425    img->tiling = layout.tiling;
1426 
1427    img->aux.type = layout.aux;
1428 
1429    img->align_i = layout.align_i;
1430    img->align_j = layout.align_j;
1431 
1432    img->walk_layer_height = layout.walk_layer_height;
1433 
1434    if (!image_set_gen6_bo_size(img, dev, info, &layout))
1435       return false;
1436 
1437    img->scanout = info->bind_scanout;
1438 
1439    switch (layout.aux) {
1440    case ILO_IMAGE_AUX_HIZ:
1441       image_set_gen6_hiz(img, dev, info, &layout);
1442       break;
1443    case ILO_IMAGE_AUX_MCS:
1444       image_set_gen7_mcs(img, dev, info, &layout);
1445       break;
1446    default:
1447       break;
1448    }
1449 
1450    return true;
1451 }
1452