• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_tilebuffer.h"
7 #include <assert.h>
8 #include "compiler/agx_internal_formats.h"
9 #include "util/bitscan.h"
10 #include "util/format/u_format.h"
11 #include "agx_formats.h"
12 #include "agx_usc.h"
13 
14 /* Maximum number of bytes per tile on G13G. This may change in future versions
15  * of the architecture.
16  */
17 #define MAX_BYTES_PER_TILE (32768 - 1)
18 
19 /* Maximum bytes per sample in the tilebuffer. Greater allocations require
20  * spilling render targets to memory.
21  */
22 #define MAX_BYTES_PER_SAMPLE (64)
23 
24 /* Minimum tile size in pixels, architectural. */
25 #define MIN_TILE_SIZE_PX (16 * 16)
26 
27 /* Select the largest tile size that fits */
28 static struct agx_tile_size
agx_select_tile_size(unsigned bytes_per_pixel)29 agx_select_tile_size(unsigned bytes_per_pixel)
30 {
31    /* clang-format off */
32    struct agx_tile_size sizes[] = {
33       { 32, 32 },
34       { 32, 16 },
35       { 16, 16 }
36    };
37    /* clang-format on */
38 
39    for (unsigned i = 0; i < ARRAY_SIZE(sizes); ++i) {
40       struct agx_tile_size size = sizes[i];
41 
42       if ((bytes_per_pixel * size.width * size.height) <= MAX_BYTES_PER_TILE)
43          return size;
44    }
45 
46    unreachable("No supported tile size meets the bytes per pixel requirement");
47 }
48 
49 struct agx_tilebuffer_layout
agx_build_tilebuffer_layout(enum pipe_format * formats,uint8_t nr_cbufs,uint8_t nr_samples,bool layered)50 agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs,
51                             uint8_t nr_samples, bool layered)
52 {
53    struct agx_tilebuffer_layout tib = {
54       .nr_samples = nr_samples,
55       .layered = layered,
56    };
57 
58    uint32_t offset_B = 0;
59 
60    for (unsigned rt = 0; rt < nr_cbufs; ++rt) {
61       tib.logical_format[rt] = formats[rt];
62 
63       /* Require natural alignment for tilebuffer allocations. This could be
64        * optimized, but this shouldn't be a problem in practice.
65        */
66       enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt);
67       unsigned align_B = util_format_get_blocksize(physical_fmt);
68       assert(util_is_power_of_two_nonzero(align_B) &&
69              util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) &&
70              align_B < MAX_BYTES_PER_SAMPLE &&
71              "max bytes per sample divisible by alignment");
72 
73       offset_B = ALIGN_POT(offset_B, align_B);
74       assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above");
75 
76       /* Determine the size, if we were to allocate this render target to the
77        * tilebuffer as desired.
78        */
79       unsigned nr = util_format_get_nr_components(physical_fmt) == 1
80                        ? util_format_get_nr_components(formats[rt])
81                        : 1;
82 
83       unsigned size_B = align_B * nr;
84       unsigned new_offset_B = offset_B + size_B;
85 
86       /* If allocating this render target would exceed any tilebuffer limits, we
87        * need to spill it to memory. We continue processing in case there are
88        * smaller render targets after that would still fit. Otherwise, we
89        * allocate it to the tilebuffer.
90        *
91        * TODO: Suboptimal, we might be able to reorder render targets to
92        * avoid fragmentation causing spilling.
93        */
94       bool fits = (new_offset_B <= MAX_BYTES_PER_SAMPLE) &&
95                   (ALIGN_POT(new_offset_B, 8) * MIN_TILE_SIZE_PX *
96                    nr_samples) <= MAX_BYTES_PER_TILE;
97 
98       if (fits) {
99          tib._offset_B[rt] = offset_B;
100          offset_B = new_offset_B;
101       } else {
102          tib.spilled[rt] = true;
103       }
104    }
105 
106    assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant");
107 
108    /* Multisampling needs a nonempty allocation.
109     * XXX: Check this against hw
110     */
111    if (nr_samples > 1)
112       offset_B = MAX2(offset_B, 1);
113 
114    tib.sample_size_B = ALIGN_POT(offset_B, 8);
115 
116    tib.tile_size = agx_select_tile_size(tib.sample_size_B * nr_samples);
117    return tib;
118 }
119 
120 enum pipe_format
agx_tilebuffer_physical_format(struct agx_tilebuffer_layout * tib,unsigned rt)121 agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt)
122 {
123    return agx_pixel_format[tib->logical_format[rt]].renderable;
124 }
125 
126 bool
agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout * tib,unsigned rt)127 agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt)
128 {
129    /* We don't bother support masking with spilled render targets. This might be
130     * optimized in the future but spilling is so rare anyway it's not worth it.
131     */
132    if (tib->spilled[rt])
133       return false;
134 
135    enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt);
136    return agx_internal_format_supports_mask((enum agx_internal_formats)fmt);
137 }
138 
139 static unsigned
agx_shared_layout_from_tile_size(struct agx_tile_size t)140 agx_shared_layout_from_tile_size(struct agx_tile_size t)
141 {
142    if (t.width == 32 && t.height == 32)
143       return AGX_SHARED_LAYOUT_32X32;
144    else if (t.width == 32 && t.height == 16)
145       return AGX_SHARED_LAYOUT_32X16;
146    else if (t.width == 16 && t.height == 16)
147       return AGX_SHARED_LAYOUT_16X16;
148    else
149       unreachable("Invalid tile size");
150 }
151 
152 uint32_t
agx_tilebuffer_total_size(struct agx_tilebuffer_layout * tib)153 agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib)
154 {
155    return tib->sample_size_B * tib->nr_samples * tib->tile_size.width *
156           tib->tile_size.height;
157 }
158 
159 void
agx_usc_tilebuffer(struct agx_usc_builder * b,struct agx_tilebuffer_layout * tib)160 agx_usc_tilebuffer(struct agx_usc_builder *b, struct agx_tilebuffer_layout *tib)
161 {
162    agx_usc_pack(b, SHARED, cfg) {
163       cfg.uses_shared_memory = true;
164       cfg.layout = agx_shared_layout_from_tile_size(tib->tile_size);
165       cfg.sample_stride_in_8_bytes = tib->sample_size_B / 8;
166       cfg.sample_count = tib->nr_samples;
167       cfg.bytes_per_threadgroup = agx_tilebuffer_total_size(tib);
168    }
169 }
170