• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef FREEDRENO_DEVICE_INFO_H
26 #define FREEDRENO_DEVICE_INFO_H
27 
28 #include <assert.h>
29 #include <stdbool.h>
30 #include <stdint.h>
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 /**
37  * Freedreno hardware description and quirks
38  */
39 
40 struct fd_dev_info {
41    uint8_t chip;
42 
43    /* alignment for size of tiles */
44    uint32_t tile_align_w, tile_align_h;
45    /* gmem load/store granularity */
46    uint32_t gmem_align_w, gmem_align_h;
47    /* max tile size */
48    uint32_t tile_max_w, tile_max_h;
49 
50    uint32_t num_vsc_pipes;
51 
52    uint32_t cs_shared_mem_size;
53 
54    int wave_granularity;
55 
56    /* Information for private memory calculations */
57    uint32_t fibers_per_sp;
58 
59    /* number of CCU is always equal to the number of SP */
60    union {
61       uint32_t num_sp_cores;
62       uint32_t num_ccu;
63    };
64 
65    struct {
66       uint32_t reg_size_vec4;
67 
68       /* The size (in instrlen units (128 bytes)) of instruction cache where
69        * we preload a shader. Loading more than this could trigger a hang
70        * on gen3 and later.
71        */
72       uint32_t instr_cache_size;
73 
74       bool has_hw_multiview;
75 
76       bool has_fs_tex_prefetch;
77 
78       /* Whether the PC_MULTIVIEW_MASK register exists. */
79       bool supports_multiview_mask;
80 
81       /* info for setting RB_CCU_CNTL */
82       bool concurrent_resolve;
83       bool has_z24uint_s8uint;
84 
85       bool tess_use_shared;
86 
87       /* Does the hw support GL_QCOM_shading_rate? */
88       bool has_shading_rate;
89 
90       /* newer a6xx allows using 16-bit descriptor for both 16-bit
91        * and 32-bit access
92        */
93       bool storage_16bit;
94 
95       /* The latest known a630_sqe.fw fails to wait for WFI before
96        * reading the indirect buffer when using CP_DRAW_INDIRECT_MULTI,
97        * so we have to fall back to CP_WAIT_FOR_ME except for a650
98        * which has a fixed firmware.
99        *
100        * TODO: There may be newer a630_sqe.fw released in the future
101        * which fixes this, if so we should detect it and avoid this
102        * workaround.  Once we have uapi to query fw version, we can
103        * replace this with minimum fw version.
104        */
105       bool indirect_draw_wfm_quirk;
106 
107       /* On some GPUs, the depth test needs to be enabled when the
108        * depth bounds test is enabled and the depth attachment uses UBWC.
109        */
110       bool depth_bounds_require_depth_test_quirk;
111 
112       bool has_tex_filter_cubic;
113       bool has_separate_chroma_filter;
114 
115       bool has_sample_locations;
116 
117       /* The firmware on newer a6xx drops CP_REG_WRITE support as we
118        * can now use direct register writes for these regs.
119        */
120       bool has_cp_reg_write;
121 
122       bool has_8bpp_ubwc;
123 
124       bool has_lpac;
125 
126       bool has_getfiberid;
127 
128       bool has_dp2acc;
129       bool has_dp4acc;
130 
131       /* LRZ fast-clear works on all gens, however blob disables it on
132        * gen1 and gen2. We also elect to disable fast-clear on these gens
133        * because for close to none gains it adds complexity and seem to work
134        * a bit differently from gen3+. Which creates at least one edge case:
135        * if first draw which uses LRZ fast-clear doesn't lock LRZ direction
136        * the fast-clear value is undefined. For details see
137        * https://gitlab.freedesktop.org/mesa/mesa/-/issues/6829
138        */
139       bool enable_lrz_fast_clear;
140       bool has_lrz_dir_tracking;
141       bool lrz_track_quirk;
142 
143       /* Some generations have a bit to add the multiview index to the
144        * viewport index, which lets us implement different scaling for
145        * different views.
146        */
147       bool has_per_view_viewport;
148       bool has_gmem_fast_clear;
149 
150       /* Per CCU GMEM amount reserved for each of DEPTH and COLOR caches
151        * in sysmem rendering. */
152       uint32_t sysmem_per_ccu_depth_cache_size;
153       uint32_t sysmem_per_ccu_color_cache_size;
154       /* Per CCU GMEM amount reserved for color cache used by GMEM resolves
155        * which require color cache (non-BLIT event case).
156        * The size is expressed as a fraction of ccu cache used by sysmem
157        * rendering. If a GMEM resolve requires color cache, the driver needs
158        * to make sure it will not overwrite pixel data in GMEM that is still
159        * needed.
160        */
161       /* see enum a6xx_ccu_cache_size */
162       uint32_t gmem_ccu_color_cache_fraction;
163 
164       /* Corresponds to HLSQ_CONTROL_1_REG::PRIMALLOCTHRESHOLD */
165       uint32_t prim_alloc_threshold;
166 
167       uint32_t vs_max_inputs_count;
168 
169       bool supports_double_threadsize;
170 
171       bool has_sampler_minmax;
172 
173       bool broken_ds_ubwc_quirk;
174 
175       /* Whether UBWC is supported on all IBOs. Prior to this, only readonly
176        * or writeonly IBOs could use UBWC and mixing reads and writes was not
177        * permitted.
178        */
179       bool supports_ibo_ubwc;
180 
181       struct {
182          uint32_t PC_POWER_CNTL;
183          uint32_t TPL1_DBG_ECO_CNTL;
184          uint32_t GRAS_DBG_ECO_CNTL;
185          uint32_t SP_CHICKEN_BITS;
186          uint32_t UCHE_CLIENT_PF;
187          uint32_t PC_MODE_CNTL;
188          uint32_t SP_DBG_ECO_CNTL;
189          uint32_t RB_DBG_ECO_CNTL;
190          uint32_t RB_DBG_ECO_CNTL_blit;
191          uint32_t HLSQ_DBG_ECO_CNTL;
192          uint32_t RB_UNKNOWN_8E01;
193          uint32_t VPC_DBG_ECO_CNTL;
194          uint32_t UCHE_UNKNOWN_0E12;
195 
196          uint32_t RB_UNKNOWN_8E06;
197       } magic;
198 
199       struct {
200             uint32_t reg;
201             uint32_t value;
202       } magic_raw[64];
203 
204       /* maximum number of descriptor sets */
205       uint32_t max_sets;
206    } a6xx;
207 
208    struct {
209       /* stsc may need to be done twice for the same range to workaround
210        * _something_, observed in blob's disassembly.
211        */
212       bool stsc_duplication_quirk;
213 
214       /* Whether there is CP_EVENT_WRITE7::WRITE_SAMPLE_COUNT */
215       bool has_event_write_sample_count;
216 
217       /* Blob executes a special compute dispatch at the start of each
218        * command buffers. We copy this dispatch as is.
219        */
220       bool cmdbuf_start_a725_quirk;
221 
222       bool load_inline_uniforms_via_preamble_ldgk;
223       bool load_shader_consts_via_preamble;
224 
225       bool has_gmem_vpc_attr_buf;
226       /* Size of buffer in gmem for VPC attributes */
227       uint32_t sysmem_vpc_attr_buf_size;
228       uint32_t gmem_vpc_attr_buf_size;
229 
230       /* Whether the UBWC fast-clear values for snorn, unorm, and int formats
231        * are the same. This is the case from a740 onwards. These formats were
232        * already otherwise UBWC-compatible, so this means that they are now
233        * fully compatible.
234        */
235       bool ubwc_unorm_snorm_int_compatible;
236    } a7xx;
237 };
238 
239 struct fd_dev_id {
240    uint32_t gpu_id;
241    uint64_t chip_id;
242 };
243 
244 /**
245  * Note that gpu-id should be considered deprecated.  For newer a6xx, if
246  * there is no gpu-id, this attempts to generate one from the chip-id.
247  * But that may not work forever, so avoid depending on this for newer
248  * gens
249  */
250 static inline uint32_t
fd_dev_gpu_id(const struct fd_dev_id * id)251 fd_dev_gpu_id(const struct fd_dev_id *id)
252 {
253    assert(id->gpu_id || id->chip_id);
254    if (!id->gpu_id) {
255       return ((id->chip_id >> 24) & 0xff) * 100 +
256              ((id->chip_id >> 16) & 0xff) * 10 +
257              ((id->chip_id >>  8) & 0xff);
258 
259    }
260    return id->gpu_id;
261 }
262 
263 /* Unmodified dev info as defined in freedreno_devices.py */
264 const struct fd_dev_info *fd_dev_info_raw(const struct fd_dev_id *id);
265 
266 /* Final dev info with dbg options and everything else applied.  */
267 const struct fd_dev_info fd_dev_info(const struct fd_dev_id *id);
268 
269 static uint8_t
fd_dev_gen(const struct fd_dev_id * id)270 fd_dev_gen(const struct fd_dev_id *id)
271 {
272    return fd_dev_info_raw(id)->chip;
273 }
274 
275 static inline bool
fd_dev_64b(const struct fd_dev_id * id)276 fd_dev_64b(const struct fd_dev_id *id)
277 {
278    return fd_dev_gen(id) >= 5;
279 }
280 
281 /* per CCU GMEM amount reserved for depth cache for direct rendering */
282 #define A6XX_CCU_DEPTH_SIZE (64 * 1024)
283 /* per CCU GMEM amount reserved for color cache used by GMEM resolves
284  * which require color cache (non-BLIT event case).
285  * this is smaller than what is normally used by direct rendering
286  * (RB_CCU_CNTL.GMEM bit enables this smaller size)
287  * if a GMEM resolve requires color cache, the driver needs to make sure
288  * it will not overwrite pixel data in GMEM that is still needed
289  */
290 #define A6XX_CCU_GMEM_COLOR_SIZE (16 * 1024)
291 
292 const char * fd_dev_name(const struct fd_dev_id *id);
293 
294 void
295 fd_dev_info_apply_dbg_options(struct fd_dev_info *info);
296 
297 #ifdef __cplusplus
298 } /* end of extern "C" */
299 #endif
300 
301 #endif /* FREEDRENO_DEVICE_INFO_H */
302