• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# Copyright © 2021 Google, Inc.
3#
4# Permission is hereby granted, free of charge, to any person obtaining a
5# copy of this software and associated documentation files (the "Software"),
6# to deal in the Software without restriction, including without limitation
7# the rights to use, copy, modify, merge, publish, distribute, sublicense,
8# and/or sell copies of the Software, and to permit persons to whom the
9# Software is furnished to do so, subject to the following conditions:
10#
11# The above copyright notice and this permission notice (including the next
12# paragraph) shall be included in all copies or substantial portions of the
13# Software.
14#
15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21# IN THE SOFTWARE.
22
23from mako.template import Template
24import sys
25
26def max_bitfield_val(high, low, shift):
27    return ((1 << (high - low)) - 1) << shift
28
29class State(object):
30    def __init__(self):
31        # List of unique device-info structs, multiple different GPU ids
32        # can map to a single info struct in cases where the differences
33        # are not sw visible, or the only differences are parameters
34        # queried from the kernel (like GMEM size)
35        self.gpu_infos = []
36
37        # Table mapping GPU id to device-info struct
38        self.gpus = {}
39
40    def info_index(self, gpu_info):
41        i = 0
42        for info in self.gpu_infos:
43            if gpu_info == info:
44                return i
45            i += 1
46        raise Error("invalid info")
47
48s = State()
49
50def add_gpus(ids, info):
51    for id in ids:
52        s.gpus[id] = info
53
54class GPUId(object):
55    def __init__(self, gpu_id = None, chip_id = None, name=None):
56        if chip_id == None:
57            assert(gpu_id != None)
58            val = gpu_id
59            core = int(val / 100)
60            val -= (core * 100);
61            major = int(val / 10);
62            val -= (major * 10)
63            minor = val
64            chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff
65        self.chip_id = chip_id
66        if gpu_id == None:
67            gpu_id = 0
68        self.gpu_id = gpu_id
69        if name == None:
70            assert(gpu_id != 0)
71            name = "FD%d" % gpu_id
72        self.name = name
73
74class Struct(object):
75    """A helper class that stringifies itself to a 'C' struct initializer
76    """
77    def __str__(self):
78        s = "{"
79        for name, value in vars(self).items():
80            s += "." + name + "=" + str(value) + ","
81        return s + "}"
82
83class GPUInfo(Struct):
84    """Base class for any generation of adreno, consists of GMEM layout
85       related parameters
86
87       Note that tile_max_h is normally only constrained by corresponding
88       bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h
89       tends to have lower limits, in which case a comment will describe
90       the bitfield size/shift
91    """
92    def __init__(self, gmem_align_w, gmem_align_h,
93                 tile_align_w, tile_align_h,
94                 tile_max_w, tile_max_h, num_vsc_pipes):
95        self.gmem_align_w  = gmem_align_w
96        self.gmem_align_h  = gmem_align_h
97        self.tile_align_w  = tile_align_w
98        self.tile_align_h  = tile_align_h
99        self.tile_max_w    = tile_max_w
100        self.tile_max_h    = tile_max_h
101        self.num_vsc_pipes = num_vsc_pipes
102
103        s.gpu_infos.append(self)
104
105
106class A6xxGPUInfo(GPUInfo):
107    """The a6xx generation has a lot more parameters, and is broken down
108       into distinct sub-generations.  The template parameter avoids
109       duplication of parameters that are unique to the sub-generation.
110    """
111    def __init__(self, template, num_sp_cores, num_ccu,
112                 RB_UNKNOWN_8E04_blit, PC_POWER_CNTL):
113        super().__init__(gmem_align_w = 16, gmem_align_h = 4,
114                         tile_align_w = 32, tile_align_h = 32,
115                         tile_max_w   = 1024, # max_bitfield_val(5, 0, 5)
116                         tile_max_h   = max_bitfield_val(14, 8, 4),
117                         num_vsc_pipes = 32)
118        assert(num_sp_cores == num_ccu)
119
120        self.num_sp_cores = num_sp_cores
121
122        # 96 tile alignment seems correlated to 3 CCU
123        if num_ccu == 3:
124            self.tile_align_w = 96
125
126        self.a6xx = Struct()
127        self.a6xx.magic = Struct()
128
129        for name, val in template["magic"].items():
130            setattr(self.a6xx.magic, name, val)
131
132        # Various "magic" register values:
133        self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit
134        self.a6xx.magic.PC_POWER_CNTL = PC_POWER_CNTL
135
136        # Things that earlier gens have and later gens remove, provide
137        # defaults here and let them be overridden by sub-gen template:
138        self.a6xx.has_cp_reg_write = True
139        self.a6xx.has_8bpp_ubwc = True
140
141        for name, val in template.items():
142            if name == "magic": # handled above
143                continue
144            setattr(self.a6xx, name, val)
145
146# a2xx is really two sub-generations, a20x and a22x, but we don't currently
147# capture that in the device-info tables
148add_gpus([
149        GPUId(200),
150        GPUId(201),
151        GPUId(205),
152        GPUId(220),
153    ], GPUInfo(
154        gmem_align_w = 32,  gmem_align_h = 32,
155        tile_align_w = 32,  tile_align_h = 32,
156        tile_max_w   = 512,
157        tile_max_h   = ~0, # TODO
158        num_vsc_pipes = 8,
159    ))
160
161add_gpus([
162        GPUId(305),
163        GPUId(307),
164        GPUId(320),
165        GPUId(330),
166    ], GPUInfo(
167        gmem_align_w = 32,  gmem_align_h = 32,
168        tile_align_w = 32,  tile_align_h = 32,
169        tile_max_w   = 992, # max_bitfield_val(4, 0, 5)
170        tile_max_h   = max_bitfield_val(9, 5, 5),
171        num_vsc_pipes = 8,
172    ))
173
174add_gpus([
175        GPUId(405),
176        GPUId(420),
177        GPUId(430),
178    ], GPUInfo(
179        gmem_align_w = 32,  gmem_align_h = 32,
180        tile_align_w = 32,  tile_align_h = 32,
181        tile_max_w   = 1024, # max_bitfield_val(4, 0, 5)
182        tile_max_h   = max_bitfield_val(9, 5, 5),
183        num_vsc_pipes = 8,
184    ))
185
186add_gpus([
187        GPUId(508),
188        GPUId(509),
189        GPUId(510),
190        GPUId(512),
191        GPUId(530),
192        GPUId(540),
193    ], GPUInfo(
194        gmem_align_w = 64,  gmem_align_h = 32,
195        tile_align_w = 64,  tile_align_h = 32,
196        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
197        tile_max_h   = max_bitfield_val(16, 9, 5),
198        num_vsc_pipes = 16,
199    ))
200
201# a6xx can be divided into distinct sub-generations, where certain device-
202# info parameters are keyed to the sub-generation.  These templates reduce
203# the copypaste
204
205# a615, a618, a630:
206a6xx_gen1 = dict(
207        fibers_per_sp = 128 * 16,
208        reg_size_vec4 = 96,
209        ccu_cntl_gmem_unk2 = True,
210        indirect_draw_wfm_quirk = True,
211        depth_bounds_require_depth_test_quirk = True,
212        magic = dict(
213            TPL1_DBG_ECO_CNTL = 0x100000,
214        )
215    )
216
217# a640, a680:
218a6xx_gen2 = dict(
219        fibers_per_sp = 128 * 4 * 16,
220        reg_size_vec4 = 96,
221        supports_multiview_mask = True,
222        has_z24uint_s8uint = True,
223        indirect_draw_wfm_quirk = True,
224        depth_bounds_require_depth_test_quirk = True, # TODO: check if true
225        magic = dict(
226            TPL1_DBG_ECO_CNTL = 0,
227        ),
228    )
229
230# a650:
231a6xx_gen3 = dict(
232        fibers_per_sp = 128 * 2 * 16,
233        reg_size_vec4 = 64,
234        supports_multiview_mask = True,
235        has_z24uint_s8uint = True,
236        tess_use_shared = True,
237        storage_16bit = True,
238        has_tex_filter_cubic = True,
239        has_sample_locations = True,
240        has_ccu_flush_bug = True,
241        has_8bpp_ubwc = False,
242        magic = dict(
243            # this seems to be a chicken bit that fixes cubic filtering:
244            TPL1_DBG_ECO_CNTL = 0x1000000,
245        ),
246    )
247
248# a635, a660:
249a6xx_gen4 = dict(
250        fibers_per_sp = 128 * 2 * 16,
251        reg_size_vec4 = 64,
252        supports_multiview_mask = True,
253        has_z24uint_s8uint = True,
254        tess_use_shared = True,
255        storage_16bit = True,
256        has_tex_filter_cubic = True,
257        has_sample_locations = True,
258        has_cp_reg_write = False,
259        has_8bpp_ubwc = False,
260        has_lpac = True,
261        has_shading_rate = True,
262        magic = dict(
263            TPL1_DBG_ECO_CNTL = 0x5008000,
264        ),
265    )
266
267add_gpus([
268        GPUId(615),
269        GPUId(618),
270    ], A6xxGPUInfo(
271        a6xx_gen1,
272        num_sp_cores = 1,
273        num_ccu = 1,
274        RB_UNKNOWN_8E04_blit = 0x00100000,
275        PC_POWER_CNTL = 0,
276    ))
277
278add_gpus([
279        GPUId(630),
280    ], A6xxGPUInfo(
281        a6xx_gen1,
282        num_sp_cores = 2,
283        num_ccu = 2,
284        RB_UNKNOWN_8E04_blit = 0x01000000,
285        PC_POWER_CNTL = 1,
286    ))
287
288add_gpus([
289        GPUId(640),
290    ], A6xxGPUInfo(
291        a6xx_gen2,
292        num_sp_cores = 2,
293        num_ccu = 2,
294        RB_UNKNOWN_8E04_blit = 0x00100000,
295        PC_POWER_CNTL = 1,
296    ))
297
298add_gpus([
299        GPUId(680),
300    ], A6xxGPUInfo(
301        a6xx_gen2,
302        num_sp_cores = 4,
303        num_ccu = 4,
304        RB_UNKNOWN_8E04_blit = 0x04100000,
305        PC_POWER_CNTL = 3,
306    ))
307
308add_gpus([
309        GPUId(650),
310    ], A6xxGPUInfo(
311        a6xx_gen3,
312        num_sp_cores = 3,
313        num_ccu = 3,
314        RB_UNKNOWN_8E04_blit = 0x04100000,
315        PC_POWER_CNTL = 2,
316    ))
317
318add_gpus([
319        GPUId(chip_id=0x06030500, name="Adreno 7c Gen 3"),
320    ], A6xxGPUInfo(
321        a6xx_gen4,
322        num_sp_cores = 2,
323        num_ccu = 2,
324        RB_UNKNOWN_8E04_blit = 0x00100000,
325        PC_POWER_CNTL = 1,
326    ))
327
328add_gpus([
329        GPUId(660),
330    ], A6xxGPUInfo(
331        a6xx_gen4,
332        num_sp_cores = 3,
333        num_ccu = 3,
334        RB_UNKNOWN_8E04_blit = 0x04100000,
335        PC_POWER_CNTL = 2,
336    ))
337
338template = """\
339/* Copyright (C) 2021 Google, Inc.
340 *
341 * Permission is hereby granted, free of charge, to any person obtaining a
342 * copy of this software and associated documentation files (the "Software"),
343 * to deal in the Software without restriction, including without limitation
344 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
345 * and/or sell copies of the Software, and to permit persons to whom the
346 * Software is furnished to do so, subject to the following conditions:
347 *
348 * The above copyright notice and this permission notice (including the next
349 * paragraph) shall be included in all copies or substantial portions of the
350 * Software.
351 *
352 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
353 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
354 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
355 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
356 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
357 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
358 * IN THE SOFTWARE.
359 */
360
361#include "freedreno_dev_info.h"
362
363/* Map python to C: */
364#define True true
365#define False false
366
367%for info in s.gpu_infos:
368static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)};
369%endfor
370
371static const struct fd_dev_rec fd_dev_recs[] = {
372%for id, info in s.gpus.items():
373   { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} },
374%endfor
375};
376"""
377
378print(Template(template).render(s=s))
379
380