• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #ifndef FREEDRENO_DEVICE_INFO_H
26 #define FREEDRENO_DEVICE_INFO_H
27 
28 #include <assert.h>
29 #include <stdbool.h>
30 #include <stdint.h>
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 /**
37  * Freedreno hardware description and quirks
38  */
39 
40 struct fd_dev_info {
41    /* alignment for size of tiles */
42    uint32_t tile_align_w, tile_align_h;
43    /* gmem load/store granularity */
44    uint32_t gmem_align_w, gmem_align_h;
45    /* max tile size */
46    uint32_t tile_max_w, tile_max_h;
47 
48    uint32_t num_vsc_pipes;
49 
50    /* number of CCU is always equal to the number of SP */
51    union {
52       uint32_t num_sp_cores;
53       uint32_t num_ccu;
54    };
55 
56    union {
57       struct {
58          /* Information for private memory calculations */
59          uint32_t fibers_per_sp;
60 
61          uint32_t reg_size_vec4;
62 
63          /* The size (in instrlen units (128 bytes)) of instruction cache where
64           * we preload a shader. Loading more than this could trigger a hang
65           * on gen3 and later.
66           */
67          uint32_t instr_cache_size;
68 
69          /* Whether the PC_MULTIVIEW_MASK register exists. */
70          bool supports_multiview_mask;
71 
72          /* info for setting RB_CCU_CNTL */
73          bool concurrent_resolve;
74          bool has_z24uint_s8uint;
75 
76          bool tess_use_shared;
77 
78          /* Does the hw support GL_QCOM_shading_rate? */
79          bool has_shading_rate;
80 
81          /* newer a6xx allows using 16-bit descriptor for both 16-bit
82           * and 32-bit access
83           */
84          bool storage_16bit;
85 
86          /* The latest known a630_sqe.fw fails to wait for WFI before
87           * reading the indirect buffer when using CP_DRAW_INDIRECT_MULTI,
88           * so we have to fall back to CP_WAIT_FOR_ME except for a650
89           * which has a fixed firmware.
90           *
91           * TODO: There may be newer a630_sqe.fw released in the future
92           * which fixes this, if so we should detect it and avoid this
93           * workaround.  Once we have uapi to query fw version, we can
94           * replace this with minimum fw version.
95           */
96          bool indirect_draw_wfm_quirk;
97 
98          /* On some GPUs, the depth test needs to be enabled when the
99           * depth bounds test is enabled and the depth attachment uses UBWC.
100           */
101          bool depth_bounds_require_depth_test_quirk;
102 
103          bool has_tex_filter_cubic;
104 
105          bool has_sample_locations;
106 
107          /* The firmware on newer a6xx drops CP_REG_WRITE support as we
108           * can now use direct register writes for these regs.
109           */
110          bool has_cp_reg_write;
111 
112          bool has_8bpp_ubwc;
113 
114          /* a650 seems to be affected by a bug where flushing CCU color into
115           * depth or vice-versa requires a WFI. In particular, clearing a
116           * depth attachment (which writes to it as a color attachment) then
117           * using it as a normal depth attachment requires a WFI in addition
118           * to the expected CCU_FLUSH_COLOR + CCU_INVALIDATE_DEPTH, even
119           * though all those operations happen in the same stage. As this is
120           * usually the only scenario where a CCU flush doesn't require a WFI
121           * we just insert a WFI after every CCU flush.
122           *
123           * Tests affected include
124           * dEQP-VK.renderpass.suballocation.formats.d16_unorm.* in sysmem
125           * mode (a few tests flake when the entire series is run).
126           */
127          bool has_ccu_flush_bug;
128 
129          bool has_lpac;
130 
131          bool has_getfiberid;
132 
133          bool has_dp2acc;
134          bool has_dp4acc;
135 
136          /* LRZ fast-clear works on all gens, however blob disables it on
137           * gen1 and gen2. We also elect to disable fast-clear on these gens
138           * because for close to none gains it adds complexity and seem to work
139           * a bit differently from gen3+. Which creates at least one edge case:
140           * if first draw which uses LRZ fast-clear doesn't lock LRZ direction
141           * the fast-clear value is undefined. For details see
142           * https://gitlab.freedesktop.org/mesa/mesa/-/issues/6829
143           */
144          bool enable_lrz_fast_clear;
145          bool has_lrz_dir_tracking;
146          bool lrz_track_quirk;
147 
148          struct {
149             uint32_t RB_UNKNOWN_8E04_blit;
150             uint32_t PC_POWER_CNTL;
151             uint32_t TPL1_DBG_ECO_CNTL;
152          } magic;
153       } a6xx;
154    };
155 };
156 
157 struct fd_dev_id {
158    uint32_t gpu_id;
159    uint64_t chip_id;
160 };
161 
162 /**
163  * Note that gpu-id should be considered deprecated.  For newer a6xx, if
164  * there is no gpu-id, this attempts to generate one from the chip-id.
165  * But that may not work forever, so avoid depending on this for newer
166  * gens
167  */
168 static inline uint32_t
fd_dev_gpu_id(const struct fd_dev_id * id)169 fd_dev_gpu_id(const struct fd_dev_id *id)
170 {
171    assert(id->gpu_id || id->chip_id);
172    if (!id->gpu_id) {
173       return ((id->chip_id >> 24) & 0xff) * 100 +
174              ((id->chip_id >> 16) & 0xff) * 10 +
175              ((id->chip_id >>  8) & 0xff);
176 
177    }
178    return id->gpu_id;
179 }
180 
181 static uint8_t
fd_dev_gen(const struct fd_dev_id * id)182 fd_dev_gen(const struct fd_dev_id *id)
183 {
184    return fd_dev_gpu_id(id) / 100;
185 }
186 
187 static inline bool
fd_dev_64b(const struct fd_dev_id * id)188 fd_dev_64b(const struct fd_dev_id *id)
189 {
190    return fd_dev_gen(id) >= 5;
191 }
192 
193 /* per CCU GMEM amount reserved for depth cache for direct rendering */
194 #define A6XX_CCU_DEPTH_SIZE (64 * 1024)
195 /* per CCU GMEM amount reserved for color cache used by GMEM resolves
196  * which require color cache (non-BLIT event case).
197  * this is smaller than what is normally used by direct rendering
198  * (RB_CCU_CNTL.GMEM bit enables this smaller size)
199  * if a GMEM resolve requires color cache, the driver needs to make sure
200  * it will not overwrite pixel data in GMEM that is still needed
201  */
202 #define A6XX_CCU_GMEM_COLOR_SIZE (16 * 1024)
203 
204 const struct fd_dev_info * fd_dev_info(const struct fd_dev_id *id);
205 const char * fd_dev_name(const struct fd_dev_id *id);
206 
207 #ifdef __cplusplus
208 } /* end of extern "C" */
209 #endif
210 
211 #endif /* FREEDRENO_DEVICE_INFO_H */
212