• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2022 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_tilebuffer.h"
7 #include <assert.h>
8 #include "util/bitscan.h"
9 #include "util/format/u_format.h"
10 #include "agx_usc.h"
11 #include "layout.h"
12 
13 /* Maximum number of bytes per tile on G13G. This may change in future versions
14  * of the architecture.
15  */
16 #define MAX_BYTES_PER_TILE (32768 - 1)
17 
18 /* Maximum bytes per sample in the tilebuffer. Greater allocations require
19  * spilling render targets to memory.
20  */
21 #define MAX_BYTES_PER_SAMPLE (64)
22 
23 /* Minimum tile size in pixels, architectural. */
24 #define MIN_TILE_SIZE_PX (16 * 16)
25 
26 /* Select the largest tile size that fits */
27 static struct agx_tile_size
agx_select_tile_size(unsigned bytes_per_pixel)28 agx_select_tile_size(unsigned bytes_per_pixel)
29 {
30    /* clang-format off */
31    struct agx_tile_size sizes[] = {
32       { 32, 32 },
33       { 32, 16 },
34       { 16, 16 }
35    };
36    /* clang-format on */
37 
38    for (unsigned i = 0; i < ARRAY_SIZE(sizes); ++i) {
39       struct agx_tile_size size = sizes[i];
40 
41       if ((bytes_per_pixel * size.width * size.height) <= MAX_BYTES_PER_TILE)
42          return size;
43    }
44 
45    unreachable("No supported tile size meets the bytes per pixel requirement");
46 }
47 
48 static unsigned
agx_shared_layout_from_tile_size(struct agx_tile_size t)49 agx_shared_layout_from_tile_size(struct agx_tile_size t)
50 {
51    if (t.width == 32 && t.height == 32)
52       return AGX_SHARED_LAYOUT_32X32;
53    else if (t.width == 32 && t.height == 16)
54       return AGX_SHARED_LAYOUT_32X16;
55    else if (t.width == 16 && t.height == 16)
56       return AGX_SHARED_LAYOUT_16X16;
57    else
58       unreachable("Invalid tile size");
59 }
60 
61 struct agx_tilebuffer_layout
agx_build_tilebuffer_layout(const enum pipe_format * formats,uint8_t nr_cbufs,uint8_t nr_samples,bool layered)62 agx_build_tilebuffer_layout(const enum pipe_format *formats, uint8_t nr_cbufs,
63                             uint8_t nr_samples, bool layered)
64 {
65    struct agx_tilebuffer_layout tib = {
66       .nr_samples = nr_samples,
67       .layered = layered,
68    };
69 
70    uint32_t offset_B = 0;
71 
72    for (unsigned rt = 0; rt < nr_cbufs; ++rt) {
73       tib.logical_format[rt] = formats[rt];
74 
75       /* If there are gaps in the layout, don't allocate holes. Obscure,
76        * PIPE_FORMAT_NONE has a size of 1, not 0.
77        */
78       if (formats[rt] == PIPE_FORMAT_NONE)
79          continue;
80 
81       /* Require natural alignment for tilebuffer allocations. This could be
82        * optimized, but this shouldn't be a problem in practice.
83        */
84       enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt);
85       unsigned align_B = util_format_get_blocksize(physical_fmt);
86       assert(util_is_power_of_two_nonzero(align_B) &&
87              util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) &&
88              align_B < MAX_BYTES_PER_SAMPLE &&
89              "max bytes per sample divisible by alignment");
90 
91       offset_B = ALIGN_POT(offset_B, align_B);
92       assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above");
93 
94       /* Determine the size, if we were to allocate this render target to the
95        * tilebuffer as desired.
96        */
97       unsigned nr = util_format_get_nr_components(physical_fmt) == 1
98                        ? util_format_get_nr_components(formats[rt])
99                        : 1;
100 
101       unsigned size_B = align_B * nr;
102       unsigned new_offset_B = offset_B + size_B;
103 
104       /* If allocating this render target would exceed any tilebuffer limits, we
105        * need to spill it to memory. We continue processing in case there are
106        * smaller render targets after that would still fit. Otherwise, we
107        * allocate it to the tilebuffer.
108        *
109        * TODO: Suboptimal, we might be able to reorder render targets to
110        * avoid fragmentation causing spilling.
111        */
112       bool fits = (new_offset_B <= MAX_BYTES_PER_SAMPLE) &&
113                   (ALIGN_POT(new_offset_B, 8) * MIN_TILE_SIZE_PX *
114                    nr_samples) <= MAX_BYTES_PER_TILE;
115 
116       if (fits) {
117          tib._offset_B[rt] = offset_B;
118          offset_B = new_offset_B;
119       } else {
120          tib.spilled[rt] = true;
121       }
122    }
123 
124    assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant");
125 
126    /* Multisampling needs a nonempty allocation.
127     * XXX: Check this against hw
128     */
129    if (nr_samples > 1)
130       offset_B = MAX2(offset_B, 1);
131 
132    tib.sample_size_B = ALIGN_POT(offset_B, 8);
133 
134    tib.tile_size = agx_select_tile_size(tib.sample_size_B * nr_samples);
135 
136    agx_tilebuffer_pack_usc(&tib);
137    return tib;
138 }
139 
140 /*
141  * With attachmentless rendering in Vulkan, the sample count may not known until
142  * draw-time. It's convenient to construct an agx_tilebuffer_layout anyway when
143  * beginning rendering, updating the sample count later. This helper allows the
144  * driver to set the sample count in a partial agx_tilebuffer_layout.
145  *
146  * When doing so, we need to rebuild entirely since e.g. tile size might change.
147  */
148 void
agx_tilebuffer_set_samples(struct agx_tilebuffer_layout * tib,unsigned nr_samples)149 agx_tilebuffer_set_samples(struct agx_tilebuffer_layout *tib,
150                            unsigned nr_samples)
151 {
152    assert(tib->nr_samples == 0 && "must not be initialized");
153 
154    *tib = agx_build_tilebuffer_layout(tib->logical_format,
155                                       ARRAY_SIZE(tib->logical_format),
156                                       nr_samples, tib->layered);
157 }
158 
159 enum pipe_format
agx_tilebuffer_physical_format(struct agx_tilebuffer_layout * tib,unsigned rt)160 agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt)
161 {
162    return ail_pixel_format[tib->logical_format[rt]].renderable;
163 }
164 
165 bool
agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout * tib,unsigned rt)166 agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt)
167 {
168    /* We don't bother support masking with spilled render targets. This might be
169     * optimized in the future but spilling is so rare anyway it's not worth it.
170     */
171    if (tib->spilled[rt])
172       return false;
173 
174    enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt);
175    return ail_isa_format_supports_mask((enum ail_isa_format)fmt);
176 }
177 
178 uint32_t
agx_tilebuffer_total_size(struct agx_tilebuffer_layout * tib)179 agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib)
180 {
181    return tib->sample_size_B * tib->nr_samples * tib->tile_size.width *
182           tib->tile_size.height;
183 }
184 
185 void
agx_tilebuffer_pack_usc(struct agx_tilebuffer_layout * tib)186 agx_tilebuffer_pack_usc(struct agx_tilebuffer_layout *tib)
187 {
188    agx_pack(&tib->usc, USC_SHARED, cfg) {
189       if (tib->nr_samples > 0) {
190          cfg.uses_shared_memory = true;
191          cfg.layout = agx_shared_layout_from_tile_size(tib->tile_size);
192          cfg.sample_stride_in_8_bytes = tib->sample_size_B / 8;
193          cfg.sample_count = tib->nr_samples;
194          cfg.bytes_per_threadgroup = agx_tilebuffer_total_size(tib);
195       } else {
196          cfg.layout = AGX_SHARED_LAYOUT_VERTEX_COMPUTE;
197          cfg.bytes_per_threadgroup = 65536;
198       }
199    }
200 }
201