1 /*
2 * Copyright 2022 Alyssa Rosenzweig
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "agx_tilebuffer.h"
7 #include <assert.h>
8 #include "compiler/agx_internal_formats.h"
9 #include "util/bitscan.h"
10 #include "util/format/u_format.h"
11 #include "agx_formats.h"
12 #include "agx_usc.h"
13
14 /* Maximum number of bytes per tile on G13G. This may change in future versions
15 * of the architecture.
16 */
17 #define MAX_BYTES_PER_TILE (32768 - 1)
18
19 /* Maximum bytes per sample in the tilebuffer. Greater allocations require
20 * spilling render targets to memory.
21 */
22 #define MAX_BYTES_PER_SAMPLE (64)
23
24 /* Minimum tile size in pixels, architectural. */
25 #define MIN_TILE_SIZE_PX (16 * 16)
26
27 /* Select the largest tile size that fits */
28 static struct agx_tile_size
agx_select_tile_size(unsigned bytes_per_pixel)29 agx_select_tile_size(unsigned bytes_per_pixel)
30 {
31 /* clang-format off */
32 struct agx_tile_size sizes[] = {
33 { 32, 32 },
34 { 32, 16 },
35 { 16, 16 }
36 };
37 /* clang-format on */
38
39 for (unsigned i = 0; i < ARRAY_SIZE(sizes); ++i) {
40 struct agx_tile_size size = sizes[i];
41
42 if ((bytes_per_pixel * size.width * size.height) <= MAX_BYTES_PER_TILE)
43 return size;
44 }
45
46 unreachable("No supported tile size meets the bytes per pixel requirement");
47 }
48
49 struct agx_tilebuffer_layout
agx_build_tilebuffer_layout(enum pipe_format * formats,uint8_t nr_cbufs,uint8_t nr_samples,bool layered)50 agx_build_tilebuffer_layout(enum pipe_format *formats, uint8_t nr_cbufs,
51 uint8_t nr_samples, bool layered)
52 {
53 struct agx_tilebuffer_layout tib = {
54 .nr_samples = nr_samples,
55 .layered = layered,
56 };
57
58 uint32_t offset_B = 0;
59
60 for (unsigned rt = 0; rt < nr_cbufs; ++rt) {
61 tib.logical_format[rt] = formats[rt];
62
63 /* Require natural alignment for tilebuffer allocations. This could be
64 * optimized, but this shouldn't be a problem in practice.
65 */
66 enum pipe_format physical_fmt = agx_tilebuffer_physical_format(&tib, rt);
67 unsigned align_B = util_format_get_blocksize(physical_fmt);
68 assert(util_is_power_of_two_nonzero(align_B) &&
69 util_is_power_of_two_nonzero(MAX_BYTES_PER_SAMPLE) &&
70 align_B < MAX_BYTES_PER_SAMPLE &&
71 "max bytes per sample divisible by alignment");
72
73 offset_B = ALIGN_POT(offset_B, align_B);
74 assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant + above");
75
76 /* Determine the size, if we were to allocate this render target to the
77 * tilebuffer as desired.
78 */
79 unsigned nr = util_format_get_nr_components(physical_fmt) == 1
80 ? util_format_get_nr_components(formats[rt])
81 : 1;
82
83 unsigned size_B = align_B * nr;
84 unsigned new_offset_B = offset_B + size_B;
85
86 /* If allocating this render target would exceed any tilebuffer limits, we
87 * need to spill it to memory. We continue processing in case there are
88 * smaller render targets after that would still fit. Otherwise, we
89 * allocate it to the tilebuffer.
90 *
91 * TODO: Suboptimal, we might be able to reorder render targets to
92 * avoid fragmentation causing spilling.
93 */
94 bool fits = (new_offset_B <= MAX_BYTES_PER_SAMPLE) &&
95 (ALIGN_POT(new_offset_B, 8) * MIN_TILE_SIZE_PX *
96 nr_samples) <= MAX_BYTES_PER_TILE;
97
98 if (fits) {
99 tib._offset_B[rt] = offset_B;
100 offset_B = new_offset_B;
101 } else {
102 tib.spilled[rt] = true;
103 }
104 }
105
106 assert(offset_B <= MAX_BYTES_PER_SAMPLE && "loop invariant");
107
108 /* Multisampling needs a nonempty allocation.
109 * XXX: Check this against hw
110 */
111 if (nr_samples > 1)
112 offset_B = MAX2(offset_B, 1);
113
114 tib.sample_size_B = ALIGN_POT(offset_B, 8);
115
116 tib.tile_size = agx_select_tile_size(tib.sample_size_B * nr_samples);
117 return tib;
118 }
119
120 enum pipe_format
agx_tilebuffer_physical_format(struct agx_tilebuffer_layout * tib,unsigned rt)121 agx_tilebuffer_physical_format(struct agx_tilebuffer_layout *tib, unsigned rt)
122 {
123 return agx_pixel_format[tib->logical_format[rt]].renderable;
124 }
125
126 bool
agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout * tib,unsigned rt)127 agx_tilebuffer_supports_mask(struct agx_tilebuffer_layout *tib, unsigned rt)
128 {
129 /* We don't bother support masking with spilled render targets. This might be
130 * optimized in the future but spilling is so rare anyway it's not worth it.
131 */
132 if (tib->spilled[rt])
133 return false;
134
135 enum pipe_format fmt = agx_tilebuffer_physical_format(tib, rt);
136 return agx_internal_format_supports_mask((enum agx_internal_formats)fmt);
137 }
138
139 static unsigned
agx_shared_layout_from_tile_size(struct agx_tile_size t)140 agx_shared_layout_from_tile_size(struct agx_tile_size t)
141 {
142 if (t.width == 32 && t.height == 32)
143 return AGX_SHARED_LAYOUT_32X32;
144 else if (t.width == 32 && t.height == 16)
145 return AGX_SHARED_LAYOUT_32X16;
146 else if (t.width == 16 && t.height == 16)
147 return AGX_SHARED_LAYOUT_16X16;
148 else
149 unreachable("Invalid tile size");
150 }
151
152 uint32_t
agx_tilebuffer_total_size(struct agx_tilebuffer_layout * tib)153 agx_tilebuffer_total_size(struct agx_tilebuffer_layout *tib)
154 {
155 return tib->sample_size_B * tib->nr_samples * tib->tile_size.width *
156 tib->tile_size.height;
157 }
158
159 void
agx_usc_tilebuffer(struct agx_usc_builder * b,struct agx_tilebuffer_layout * tib)160 agx_usc_tilebuffer(struct agx_usc_builder *b, struct agx_tilebuffer_layout *tib)
161 {
162 agx_usc_pack(b, SHARED, cfg) {
163 cfg.uses_shared_memory = true;
164 cfg.layout = agx_shared_layout_from_tile_size(tib->tile_size);
165 cfg.sample_stride_in_8_bytes = tib->sample_size_B / 8;
166 cfg.sample_count = tib->nr_samples;
167 cfg.bytes_per_threadgroup = agx_tilebuffer_total_size(tib);
168 }
169 }
170