• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# Copyright © 2021 Google, Inc.
3#
4# SPDX-License-Identifier: MIT
5
6from mako.template import Template
7import sys
8import argparse
9from enum import Enum
10
11def max_bitfield_val(high, low, shift):
12    return ((1 << (high - low)) - 1) << shift
13
14
15parser = argparse.ArgumentParser()
16parser.add_argument('-p', '--import-path', required=True)
17args = parser.parse_args()
18sys.path.insert(0, args.import_path)
19
20from a6xx import *
21
22
23class CHIP(Enum):
24    A2XX = 2
25    A3XX = 3
26    A4XX = 4
27    A5XX = 5
28    A6XX = 6
29    A7XX = 7
30
31class CCUColorCacheFraction(Enum):
32    FULL = 0
33    HALF = 1
34    QUARTER = 2
35    EIGHTH = 3
36
37
38class State(object):
39    def __init__(self):
40        # List of unique device-info structs, multiple different GPU ids
41        # can map to a single info struct in cases where the differences
42        # are not sw visible, or the only differences are parameters
43        # queried from the kernel (like GMEM size)
44        self.gpu_infos = []
45
46        # Table mapping GPU id to device-info struct
47        self.gpus = {}
48
49    def info_index(self, gpu_info):
50        i = 0
51        for info in self.gpu_infos:
52            if gpu_info == info:
53                return i
54            i += 1
55        raise Error("invalid info")
56
57s = State()
58
59def add_gpus(ids, info):
60    for id in ids:
61        s.gpus[id] = info
62
63class GPUId(object):
64    def __init__(self, gpu_id = None, chip_id = None, name=None):
65        if chip_id is None:
66            assert(gpu_id is not None)
67            val = gpu_id
68            core = int(val / 100)
69            val -= (core * 100)
70            major = int(val / 10)
71            val -= (major * 10)
72            minor = val
73            chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff
74        self.chip_id = chip_id
75        if gpu_id is None:
76            gpu_id = 0
77        self.gpu_id = gpu_id
78        if name is None:
79            assert(gpu_id != 0)
80            name = "FD%d" % gpu_id
81        self.name = name
82
83class Struct(object):
84    """A helper class that stringifies itself to a 'C' struct initializer
85    """
86    def __str__(self):
87        s = "{"
88        for name, value in vars(self).items():
89            s += "." + name + "=" + str(value) + ","
90        return s + "}"
91
92class GPUInfo(Struct):
93    """Base class for any generation of adreno, consists of GMEM layout
94       related parameters
95
96       Note that tile_max_h is normally only constrained by corresponding
97       bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h
98       tends to have lower limits, in which case a comment will describe
99       the bitfield size/shift
100    """
101    def __init__(self, chip, gmem_align_w, gmem_align_h,
102                 tile_align_w, tile_align_h,
103                 tile_max_w, tile_max_h, num_vsc_pipes,
104                 cs_shared_mem_size, num_sp_cores, wave_granularity, fibers_per_sp,
105                 highest_bank_bit = 0, ubwc_swizzle = 0x7, macrotile_mode = 0,
106                 threadsize_base = 64, max_waves = 16):
107        self.chip          = chip.value
108        self.gmem_align_w  = gmem_align_w
109        self.gmem_align_h  = gmem_align_h
110        self.tile_align_w  = tile_align_w
111        self.tile_align_h  = tile_align_h
112        self.tile_max_w    = tile_max_w
113        self.tile_max_h    = tile_max_h
114        self.num_vsc_pipes = num_vsc_pipes
115        self.cs_shared_mem_size = cs_shared_mem_size
116        self.num_sp_cores  = num_sp_cores
117        self.wave_granularity = wave_granularity
118        self.fibers_per_sp = fibers_per_sp
119        self.threadsize_base = threadsize_base
120        self.max_waves     = max_waves
121        self.highest_bank_bit = highest_bank_bit
122        self.ubwc_swizzle = ubwc_swizzle
123        self.macrotile_mode = macrotile_mode
124
125        s.gpu_infos.append(self)
126
127
128class A6xxGPUInfo(GPUInfo):
129    """The a6xx generation has a lot more parameters, and is broken down
130       into distinct sub-generations.  The template parameter avoids
131       duplication of parameters that are unique to the sub-generation.
132    """
133    def __init__(self, chip, template, num_ccu,
134                 tile_align_w, tile_align_h, num_vsc_pipes,
135                 cs_shared_mem_size, wave_granularity, fibers_per_sp,
136                 magic_regs, raw_magic_regs = None, highest_bank_bit = 15,
137                 ubwc_swizzle = 0x6, macrotile_mode = 1,
138                 threadsize_base = 64, max_waves = 16):
139        if chip == CHIP.A6XX:
140            tile_max_w   = 1024 # max_bitfield_val(5, 0, 5)
141            tile_max_h   = max_bitfield_val(14, 8, 4) # 1008
142        else:
143            tile_max_w   = 1728
144            tile_max_h   = 1728
145
146        super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4,
147                         tile_align_w = tile_align_w,
148                         tile_align_h = tile_align_h,
149                         tile_max_w   = tile_max_w,
150                         tile_max_h   = tile_max_h,
151                         num_vsc_pipes = num_vsc_pipes,
152                         cs_shared_mem_size = cs_shared_mem_size,
153                         num_sp_cores = num_ccu, # The # of SP cores seems to always match # of CCU
154                         wave_granularity   = wave_granularity,
155                         fibers_per_sp      = fibers_per_sp,
156                         highest_bank_bit = highest_bank_bit,
157                         ubwc_swizzle = ubwc_swizzle,
158                         macrotile_mode = macrotile_mode,
159                         threadsize_base    = threadsize_base,
160                         max_waves    = max_waves)
161
162        self.num_ccu = num_ccu
163
164        self.a6xx = Struct()
165        self.a7xx = Struct()
166
167        self.a6xx.magic = Struct()
168
169        for name, val in magic_regs.items():
170            setattr(self.a6xx.magic, name, val)
171
172        if raw_magic_regs:
173            self.a6xx.magic_raw = [[int(r[0]), r[1]] for r in raw_magic_regs]
174
175        templates = template if isinstance(template, list) else [template]
176        for template in templates:
177            template.apply_props(self)
178
179
180    def __str__(self):
181     return super(A6xxGPUInfo, self).__str__().replace('[', '{').replace("]", "}")
182
183
184# a2xx is really two sub-generations, a20x and a22x, but we don't currently
185# capture that in the device-info tables
186add_gpus([
187        GPUId(200),
188        GPUId(201),
189        GPUId(205),
190        GPUId(220),
191    ], GPUInfo(
192        CHIP.A2XX,
193        gmem_align_w = 32,  gmem_align_h = 32,
194        tile_align_w = 32,  tile_align_h = 32,
195        tile_max_w   = 512,
196        tile_max_h   = ~0, # TODO
197        num_vsc_pipes = 8,
198        cs_shared_mem_size = 0,
199        num_sp_cores = 0, # TODO
200        wave_granularity = 2,
201        fibers_per_sp = 0, # TODO
202        threadsize_base = 8, # TODO: Confirm this
203    ))
204
205add_gpus([
206        GPUId(305),
207        GPUId(307),
208        GPUId(320),
209        GPUId(330),
210        GPUId(chip_id=0x03000512, name="FD305B"),
211        GPUId(chip_id=0x03000620, name="FD306A"),
212    ], GPUInfo(
213        CHIP.A3XX,
214        gmem_align_w = 32,  gmem_align_h = 32,
215        tile_align_w = 32,  tile_align_h = 32,
216        tile_max_w   = 992, # max_bitfield_val(4, 0, 5)
217        tile_max_h   = max_bitfield_val(9, 5, 5),
218        num_vsc_pipes = 8,
219        cs_shared_mem_size = 32 * 1024,
220        num_sp_cores = 0, # TODO
221        wave_granularity = 2,
222        fibers_per_sp = 0, # TODO
223        threadsize_base = 8,
224    ))
225
226add_gpus([
227        GPUId(405),
228        GPUId(420),
229        GPUId(430),
230    ], GPUInfo(
231        CHIP.A4XX,
232        gmem_align_w = 32,  gmem_align_h = 32,
233        tile_align_w = 32,  tile_align_h = 32,
234        tile_max_w   = 1024, # max_bitfield_val(4, 0, 5)
235        tile_max_h   = max_bitfield_val(9, 5, 5),
236        num_vsc_pipes = 8,
237        cs_shared_mem_size = 32 * 1024,
238        num_sp_cores = 0, # TODO
239        wave_granularity = 2,
240        fibers_per_sp = 0, # TODO
241        threadsize_base = 32, # TODO: Confirm this
242    ))
243
244add_gpus([
245        GPUId(505),
246        GPUId(506),
247        GPUId(508),
248        GPUId(509),
249    ], GPUInfo(
250        CHIP.A5XX,
251        gmem_align_w = 64,  gmem_align_h = 32,
252        tile_align_w = 64,  tile_align_h = 32,
253        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
254        tile_max_h   = max_bitfield_val(16, 9, 5),
255        num_vsc_pipes = 16,
256        cs_shared_mem_size = 32 * 1024,
257        num_sp_cores = 1,
258        wave_granularity = 2,
259        fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
260        highest_bank_bit = 14,
261        threadsize_base = 32,
262    ))
263
264add_gpus([
265        GPUId(510),
266        GPUId(512),
267    ], GPUInfo(
268        CHIP.A5XX,
269        gmem_align_w = 64,  gmem_align_h = 32,
270        tile_align_w = 64,  tile_align_h = 32,
271        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
272        tile_max_h   = max_bitfield_val(16, 9, 5),
273        num_vsc_pipes = 16,
274        cs_shared_mem_size = 32 * 1024,
275        num_sp_cores = 2,
276        wave_granularity = 2,
277        fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
278        highest_bank_bit = 14,
279        threadsize_base = 32,
280    ))
281
282add_gpus([
283        GPUId(530),
284        GPUId(540),
285    ], GPUInfo(
286        CHIP.A5XX,
287        gmem_align_w = 64,  gmem_align_h = 32,
288        tile_align_w = 64,  tile_align_h = 32,
289        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
290        tile_max_h   = max_bitfield_val(16, 9, 5),
291        num_vsc_pipes = 16,
292        cs_shared_mem_size = 32 * 1024,
293        num_sp_cores = 4,
294        wave_granularity = 2,
295        fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd.
296        highest_bank_bit = 15,
297        threadsize_base = 32,
298    ))
299
300
301class A6XXProps(dict):
302    unique_props = dict()
303    def apply_gen_props(self, gen, gpu_info):
304        for name, val in self.items():
305            setattr(getattr(gpu_info, gen), name, val)
306            A6XXProps.unique_props[(name, gen)] = val
307
308    def apply_props(self, gpu_info):
309        self.apply_gen_props("a6xx", gpu_info)
310
311
312class A7XXProps(A6XXProps):
313    def apply_props(self, gpu_info):
314        self.apply_gen_props("a7xx", gpu_info)
315
316
317# Props could be modified with env var:
318#  FD_DEV_FEATURES=%feature_name%=%value%:%feature_name%=%value%:...
319# e.g.
320#  FD_DEV_FEATURES=has_fs_tex_prefetch=0:max_sets=4
321
322a6xx_base = A6XXProps(
323        has_cp_reg_write = True,
324        has_8bpp_ubwc = True,
325        has_gmem_fast_clear = True,
326        has_hw_multiview = True,
327        has_fs_tex_prefetch = True,
328        has_sampler_minmax = True,
329
330        supports_double_threadsize = True,
331
332        sysmem_per_ccu_depth_cache_size = 64 * 1024,
333        sysmem_per_ccu_color_cache_size = 64 * 1024,
334        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value,
335
336        prim_alloc_threshold = 0x7,
337        vs_max_inputs_count = 32,
338        max_sets = 5,
339        line_width_min = 1.0,
340        line_width_max = 1.0,
341    )
342
343
344# a6xx and a7xx can be divided into distinct sub-generations, where certain
345# device-info parameters are keyed to the sub-generation.  These templates
346# reduce the copypaste
347
348a6xx_gen1_low = A6XXProps(
349        reg_size_vec4 = 48,
350        instr_cache_size = 64,
351        indirect_draw_wfm_quirk = True,
352        depth_bounds_require_depth_test_quirk = True,
353
354        has_gmem_fast_clear = False,
355        has_hw_multiview = False,
356        has_sampler_minmax = False,
357        has_fs_tex_prefetch = False,
358        sysmem_per_ccu_color_cache_size = 8 * 1024,
359        sysmem_per_ccu_depth_cache_size = 8 * 1024,
360        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value,
361        vs_max_inputs_count = 16,
362        supports_double_threadsize = False,
363    )
364
365a6xx_gen1 = A6XXProps(
366        reg_size_vec4 = 96,
367        instr_cache_size = 64,
368        indirect_draw_wfm_quirk = True,
369        depth_bounds_require_depth_test_quirk = True,
370    )
371
372a6xx_gen2 = A6XXProps(
373        reg_size_vec4 = 96,
374        instr_cache_size = 64, # TODO
375        supports_multiview_mask = True,
376        has_z24uint_s8uint = True,
377        indirect_draw_wfm_quirk = True,
378        depth_bounds_require_depth_test_quirk = True, # TODO: check if true
379        has_dp2acc = False, # TODO: check if true
380        has_8bpp_ubwc = False,
381    )
382
383a6xx_gen3 = A6XXProps(
384        reg_size_vec4 = 64,
385        # Blob limits it to 128 but we hang with 128
386        instr_cache_size = 127,
387        supports_multiview_mask = True,
388        has_z24uint_s8uint = True,
389        tess_use_shared = True,
390        storage_16bit = True,
391        has_tex_filter_cubic = True,
392        has_separate_chroma_filter = True,
393        has_sample_locations = True,
394        has_8bpp_ubwc = False,
395        has_dp2acc = True,
396        has_lrz_dir_tracking = True,
397        enable_lrz_fast_clear = True,
398        lrz_track_quirk = True,
399        has_lrz_feedback = True,
400        has_per_view_viewport = True,
401        has_scalar_alu = True,
402        has_early_preamble = True,
403        prede_nop_quirk = True,
404    )
405
406a6xx_gen4 = A6XXProps(
407        reg_size_vec4 = 64,
408        # Blob limits it to 128 but we hang with 128
409        instr_cache_size = 127,
410        supports_multiview_mask = True,
411        has_z24uint_s8uint = True,
412        tess_use_shared = True,
413        storage_16bit = True,
414        has_tex_filter_cubic = True,
415        has_separate_chroma_filter = True,
416        has_sample_locations = True,
417        has_cp_reg_write = False,
418        has_8bpp_ubwc = False,
419        has_lpac = True,
420        has_legacy_pipeline_shading_rate = True,
421        has_getfiberid = True,
422        has_dp2acc = True,
423        has_dp4acc = True,
424        enable_lrz_fast_clear = True,
425        has_lrz_dir_tracking = True,
426        has_lrz_feedback = True,
427        has_per_view_viewport = True,
428        has_scalar_alu = True,
429        has_isam_v = True,
430        has_ssbo_imm_offsets = True,
431        has_ubwc_linear_mipmap_fallback = True,
432        # TODO: there seems to be a quirk where at least rcp can't be in an
433        # early preamble. a660 at least is affected.
434        #has_early_preamble = True,
435        prede_nop_quirk = True,
436        predtf_nop_quirk = True,
437        has_sad = True,
438    )
439
440add_gpus([
441        GPUId(605), # TODO: Test it, based only on libwrapfake dumps
442        GPUId(608), # TODO: Test it, based only on libwrapfake dumps
443        GPUId(610),
444        GPUId(612), # TODO: Test it, based only on libwrapfake dumps
445    ], A6xxGPUInfo(
446        CHIP.A6XX,
447        [a6xx_base, a6xx_gen1_low],
448        num_ccu = 1,
449        tile_align_w = 32,
450        tile_align_h = 16,
451        num_vsc_pipes = 16,
452        cs_shared_mem_size = 16 * 1024,
453        wave_granularity = 1,
454        fibers_per_sp = 128 * 16,
455        highest_bank_bit = 13,
456        ubwc_swizzle = 0x7,
457        macrotile_mode = 0,
458        magic_regs = dict(
459            PC_POWER_CNTL = 0,
460            TPL1_DBG_ECO_CNTL = 0,
461            GRAS_DBG_ECO_CNTL = 0,
462            SP_CHICKEN_BITS = 0,
463            UCHE_CLIENT_PF = 0x00000004,
464            PC_MODE_CNTL = 0xf,
465            SP_DBG_ECO_CNTL = 0x0,
466            RB_DBG_ECO_CNTL = 0x04100000,
467            RB_DBG_ECO_CNTL_blit = 0x04100000,
468            HLSQ_DBG_ECO_CNTL = 0,
469            RB_UNKNOWN_8E01 = 0x00000001,
470            VPC_DBG_ECO_CNTL = 0x0,
471            UCHE_UNKNOWN_0E12 = 0x10000000,
472        ),
473    ))
474
475add_gpus([
476        GPUId(615),
477        GPUId(616),
478        GPUId(618),
479        GPUId(619),
480    ], A6xxGPUInfo(
481        CHIP.A6XX,
482        [a6xx_base, a6xx_gen1],
483        num_ccu = 1,
484        tile_align_w = 32,
485        tile_align_h = 32,
486        num_vsc_pipes = 32,
487        cs_shared_mem_size = 32 * 1024,
488        wave_granularity = 2,
489        fibers_per_sp = 128 * 16,
490        highest_bank_bit = 14,
491        macrotile_mode = 0,
492        magic_regs = dict(
493            PC_POWER_CNTL = 0,
494            TPL1_DBG_ECO_CNTL = 0x00108000,
495            GRAS_DBG_ECO_CNTL = 0x00000880,
496            SP_CHICKEN_BITS = 0x00000430,
497            UCHE_CLIENT_PF = 0x00000004,
498            PC_MODE_CNTL = 0x1f,
499            SP_DBG_ECO_CNTL = 0x0,
500            RB_DBG_ECO_CNTL = 0x04100000,
501            RB_DBG_ECO_CNTL_blit = 0x04100000,
502            HLSQ_DBG_ECO_CNTL = 0x00080000,
503            RB_UNKNOWN_8E01 = 0x00000001,
504            VPC_DBG_ECO_CNTL = 0x0,
505            UCHE_UNKNOWN_0E12 = 0x00000001
506        )
507    ))
508
509add_gpus([
510        GPUId(620),
511    ], A6xxGPUInfo(
512        CHIP.A6XX,
513        [a6xx_base, a6xx_gen1],
514        num_ccu = 1,
515        tile_align_w = 32,
516        tile_align_h = 16,
517        num_vsc_pipes = 32,
518        cs_shared_mem_size = 32 * 1024,
519        wave_granularity = 2,
520        fibers_per_sp = 128 * 16,
521        magic_regs = dict(
522            PC_POWER_CNTL = 0,
523            TPL1_DBG_ECO_CNTL = 0x01008000,
524            GRAS_DBG_ECO_CNTL = 0x0,
525            SP_CHICKEN_BITS = 0x00000400,
526            UCHE_CLIENT_PF = 0x00000004,
527            PC_MODE_CNTL = 0x1f,
528            SP_DBG_ECO_CNTL = 0x01000000,
529            RB_DBG_ECO_CNTL = 0x04100000,
530            RB_DBG_ECO_CNTL_blit = 0x04100000,
531            HLSQ_DBG_ECO_CNTL = 0x0,
532            RB_UNKNOWN_8E01 = 0x0,
533            VPC_DBG_ECO_CNTL = 0x02000000,
534            UCHE_UNKNOWN_0E12 = 0x00000001
535        )
536    ))
537
538add_gpus([
539        GPUId(chip_id=0xffff06020100, name="FD621"),
540    ], A6xxGPUInfo(
541        CHIP.A6XX,
542        [a6xx_base, a6xx_gen3, A6XXProps(lrz_track_quirk = False)],
543        num_ccu = 2,
544        tile_align_w = 96,
545        tile_align_h = 16,
546        num_vsc_pipes = 32,
547        cs_shared_mem_size = 32 * 1024,
548        wave_granularity = 2,
549        fibers_per_sp = 128 * 2 * 16,
550        magic_regs = dict(
551            PC_POWER_CNTL = 0,
552            # this seems to be a chicken bit that fixes cubic filtering:
553            TPL1_DBG_ECO_CNTL = 0x01008000,
554            GRAS_DBG_ECO_CNTL = 0x0,
555            SP_CHICKEN_BITS = 0x00001400,
556            # UCHE_CLIENT_PF = 0x00000004,
557            PC_MODE_CNTL = 0x1f,
558            SP_DBG_ECO_CNTL = 0x03000000,
559            RB_DBG_ECO_CNTL = 0x04100000,
560            RB_DBG_ECO_CNTL_blit = 0x04100000,
561            HLSQ_DBG_ECO_CNTL = 0x0,
562            RB_UNKNOWN_8E01 = 0x0,
563            VPC_DBG_ECO_CNTL = 0x02000000,
564            UCHE_UNKNOWN_0E12 = 0x00000001
565        )
566    ))
567
568add_gpus([
569        GPUId(630),
570    ], A6xxGPUInfo(
571        CHIP.A6XX,
572        [a6xx_base, a6xx_gen1],
573        num_ccu = 2,
574        tile_align_w = 32,
575        tile_align_h = 16,
576        num_vsc_pipes = 32,
577        cs_shared_mem_size = 32 * 1024,
578        wave_granularity = 2,
579        fibers_per_sp = 128 * 16,
580        highest_bank_bit = 15,
581        macrotile_mode = 0,
582        magic_regs = dict(
583            PC_POWER_CNTL = 1,
584            TPL1_DBG_ECO_CNTL = 0x00108000,
585            GRAS_DBG_ECO_CNTL = 0x00000880,
586            SP_CHICKEN_BITS = 0x00001430,
587            UCHE_CLIENT_PF = 0x00000004,
588            PC_MODE_CNTL = 0x1f,
589            SP_DBG_ECO_CNTL = 0x0,
590            RB_DBG_ECO_CNTL = 0x04100000,
591            RB_DBG_ECO_CNTL_blit = 0x05100000,
592            HLSQ_DBG_ECO_CNTL = 0x00080000,
593            RB_UNKNOWN_8E01 = 0x00000001,
594            VPC_DBG_ECO_CNTL = 0x0,
595            UCHE_UNKNOWN_0E12 = 0x10000001
596        )
597    ))
598
599add_gpus([
600        GPUId(640),
601    ], A6xxGPUInfo(
602        CHIP.A6XX,
603        [a6xx_base, a6xx_gen2],
604        num_ccu = 2,
605        tile_align_w = 32,
606        tile_align_h = 16,
607        num_vsc_pipes = 32,
608        cs_shared_mem_size = 32 * 1024,
609        wave_granularity = 2,
610        fibers_per_sp = 128 * 4 * 16,
611        highest_bank_bit = 15,
612        macrotile_mode = 0,
613        magic_regs = dict(
614            PC_POWER_CNTL = 1,
615            TPL1_DBG_ECO_CNTL = 0x00008000,
616            GRAS_DBG_ECO_CNTL = 0x0,
617            SP_CHICKEN_BITS = 0x00000420,
618            UCHE_CLIENT_PF = 0x00000004,
619            PC_MODE_CNTL = 0x1f,
620            SP_DBG_ECO_CNTL = 0x0,
621            RB_DBG_ECO_CNTL = 0x04100000,
622            RB_DBG_ECO_CNTL_blit = 0x04100000,
623            HLSQ_DBG_ECO_CNTL = 0x0,
624            RB_UNKNOWN_8E01 = 0x00000001,
625            VPC_DBG_ECO_CNTL = 0x02000000,
626            UCHE_UNKNOWN_0E12 = 0x00000001
627        )
628    ))
629
630add_gpus([
631        GPUId(680),
632    ], A6xxGPUInfo(
633        CHIP.A6XX,
634        [a6xx_base, a6xx_gen2],
635        num_ccu = 4,
636        tile_align_w = 64,
637        tile_align_h = 32,
638        num_vsc_pipes = 32,
639        cs_shared_mem_size = 32 * 1024,
640        wave_granularity = 2,
641        fibers_per_sp = 128 * 4 * 16,
642        highest_bank_bit = 15,
643        macrotile_mode = 0,
644        magic_regs = dict(
645            PC_POWER_CNTL = 3,
646            TPL1_DBG_ECO_CNTL = 0x00108000,
647            GRAS_DBG_ECO_CNTL = 0x0,
648            SP_CHICKEN_BITS = 0x00001430,
649            UCHE_CLIENT_PF = 0x00000004,
650            PC_MODE_CNTL = 0x1f,
651            SP_DBG_ECO_CNTL = 0x0,
652            RB_DBG_ECO_CNTL = 0x04100000,
653            RB_DBG_ECO_CNTL_blit = 0x04100000,
654            HLSQ_DBG_ECO_CNTL = 0x0,
655            RB_UNKNOWN_8E01 = 0x00000001,
656            VPC_DBG_ECO_CNTL = 0x02000000,
657            UCHE_UNKNOWN_0E12 = 0x00000001
658        )
659    ))
660
661add_gpus([
662        GPUId(650),
663    ], A6xxGPUInfo(
664        CHIP.A6XX,
665        [a6xx_base, a6xx_gen3],
666        num_ccu = 3,
667        tile_align_w = 96,
668        tile_align_h = 16,
669        num_vsc_pipes = 32,
670        cs_shared_mem_size = 32 * 1024,
671        wave_granularity = 2,
672        fibers_per_sp = 128 * 2 * 16,
673        highest_bank_bit = 16,
674        magic_regs = dict(
675            PC_POWER_CNTL = 2,
676            # this seems to be a chicken bit that fixes cubic filtering:
677            TPL1_DBG_ECO_CNTL = 0x01008000,
678            GRAS_DBG_ECO_CNTL = 0x0,
679            SP_CHICKEN_BITS = 0x00001400,
680            UCHE_CLIENT_PF = 0x00000004,
681            PC_MODE_CNTL = 0x1f,
682            SP_DBG_ECO_CNTL = 0x01000000,
683            RB_DBG_ECO_CNTL = 0x04100000,
684            RB_DBG_ECO_CNTL_blit = 0x04100000,
685            HLSQ_DBG_ECO_CNTL = 0x0,
686            RB_UNKNOWN_8E01 = 0x0,
687            VPC_DBG_ECO_CNTL = 0x02000000,
688            UCHE_UNKNOWN_0E12 = 0x00000001
689        )
690    ))
691
692add_gpus([
693        # These are all speedbins/variants of A635
694        GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"),
695        GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"),
696        GPUId(chip_id=0x006006030500, name="Adreno 7c+ Gen 3 Lite"),
697        GPUId(chip_id=0x00ac06030500, name="FD643"), # e.g. QCM6490, Fairphone 5
698        # fallback wildcard entry should be last:
699        GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"),
700    ], A6xxGPUInfo(
701        CHIP.A6XX,
702        [a6xx_base, a6xx_gen4],
703        num_ccu = 2,
704        tile_align_w = 32,
705        tile_align_h = 16,
706        num_vsc_pipes = 32,
707        cs_shared_mem_size = 32 * 1024,
708        wave_granularity = 2,
709        fibers_per_sp = 128 * 2 * 16,
710        highest_bank_bit = 14,
711        magic_regs = dict(
712            PC_POWER_CNTL = 1,
713            TPL1_DBG_ECO_CNTL = 0x05008000,
714            GRAS_DBG_ECO_CNTL = 0x0,
715            SP_CHICKEN_BITS = 0x00001400,
716            UCHE_CLIENT_PF = 0x00000084,
717            PC_MODE_CNTL = 0x1f,
718            SP_DBG_ECO_CNTL = 0x00000006,
719            RB_DBG_ECO_CNTL = 0x04100000,
720            RB_DBG_ECO_CNTL_blit = 0x04100000,
721            HLSQ_DBG_ECO_CNTL = 0x0,
722            RB_UNKNOWN_8E01 = 0x0,
723            VPC_DBG_ECO_CNTL = 0x02000000,
724            UCHE_UNKNOWN_0E12 = 0x00000001
725        )
726    ))
727
728add_gpus([
729        GPUId(660),
730    ], A6xxGPUInfo(
731        CHIP.A6XX,
732        [a6xx_base, a6xx_gen4],
733        num_ccu = 3,
734        tile_align_w = 96,
735        tile_align_h = 16,
736        num_vsc_pipes = 32,
737        cs_shared_mem_size = 32 * 1024,
738        wave_granularity = 2,
739        fibers_per_sp = 128 * 2 * 16,
740        highest_bank_bit = 16,
741        magic_regs = dict(
742            PC_POWER_CNTL = 2,
743            TPL1_DBG_ECO_CNTL = 0x05008000,
744            GRAS_DBG_ECO_CNTL = 0x0,
745            SP_CHICKEN_BITS = 0x00001400,
746            UCHE_CLIENT_PF = 0x00000084,
747            PC_MODE_CNTL = 0x1f,
748            SP_DBG_ECO_CNTL = 0x01000000,
749            RB_DBG_ECO_CNTL = 0x04100000,
750            RB_DBG_ECO_CNTL_blit = 0x04100000,
751            HLSQ_DBG_ECO_CNTL = 0x0,
752            RB_UNKNOWN_8E01 = 0x0,
753            VPC_DBG_ECO_CNTL = 0x02000000,
754            UCHE_UNKNOWN_0E12 = 0x00000001
755        )
756    ))
757
758add_gpus([
759        GPUId(chip_id=0x6060201, name="FD644"), # Called A662 in kgsl
760        GPUId(chip_id=0xffff06060300, name="FD663"),
761    ], A6xxGPUInfo(
762        CHIP.A6XX,
763        [a6xx_base, a6xx_gen4],
764        num_ccu = 3,
765        tile_align_w = 96,
766        tile_align_h = 16,
767        num_vsc_pipes = 32,
768        cs_shared_mem_size = 32 * 1024,
769        wave_granularity = 2,
770        fibers_per_sp = 128 * 4 * 16,
771        magic_regs = dict(
772            PC_POWER_CNTL = 2,
773            TPL1_DBG_ECO_CNTL = 0x05008000,
774            GRAS_DBG_ECO_CNTL = 0x0,
775            SP_CHICKEN_BITS = 0x00001400,
776            UCHE_CLIENT_PF = 0x00000084,
777            PC_MODE_CNTL = 0x1f,
778            SP_DBG_ECO_CNTL = 0x6,
779            RB_DBG_ECO_CNTL = 0x04100000,
780            RB_DBG_ECO_CNTL_blit = 0x04100000,
781            HLSQ_DBG_ECO_CNTL = 0x0,
782            RB_UNKNOWN_8E01 = 0x0,
783            VPC_DBG_ECO_CNTL = 0x02000000,
784            UCHE_UNKNOWN_0E12 = 0x00000001
785        )
786    ))
787
788add_gpus([
789        GPUId(690),
790        GPUId(chip_id=0xffff06090000, name="FD690"), # Default no-speedbin fallback
791    ], A6xxGPUInfo(
792        CHIP.A6XX,
793        [a6xx_base, a6xx_gen4, A6XXProps(broken_ds_ubwc_quirk = True)],
794        num_ccu = 8,
795        tile_align_w = 64,
796        tile_align_h = 32,
797        num_vsc_pipes = 32,
798        cs_shared_mem_size = 32 * 1024,
799        wave_granularity = 2,
800        fibers_per_sp = 128 * 2 * 16,
801        highest_bank_bit = 16,
802        magic_regs = dict(
803            PC_POWER_CNTL = 7,
804            TPL1_DBG_ECO_CNTL = 0x04c00000,
805            GRAS_DBG_ECO_CNTL = 0x0,
806            SP_CHICKEN_BITS = 0x00001400,
807            UCHE_CLIENT_PF = 0x00000084,
808            PC_MODE_CNTL = 0x1f,
809            SP_DBG_ECO_CNTL = 0x1200000,
810            RB_DBG_ECO_CNTL = 0x100000,
811            RB_DBG_ECO_CNTL_blit = 0x00100000,  # ???
812            HLSQ_DBG_ECO_CNTL = 0x0,
813            RB_UNKNOWN_8E01 = 0x0,
814            VPC_DBG_ECO_CNTL = 0x2000400,
815            UCHE_UNKNOWN_0E12 = 0x00000001
816        ),
817        raw_magic_regs = [
818            [A6XXRegs.REG_A6XX_SP_UNKNOWN_AAF2, 0x00c00000],
819        ],
820    ))
821
822# Based on a6xx_base + a6xx_gen4
823a7xx_base = A6XXProps(
824        has_gmem_fast_clear = True,
825        has_hw_multiview = True,
826        has_fs_tex_prefetch = True,
827        has_sampler_minmax = True,
828
829        supports_double_threadsize = True,
830
831        sysmem_per_ccu_depth_cache_size = 256 * 1024,
832        sysmem_per_ccu_color_cache_size = 64 * 1024,
833        gmem_ccu_color_cache_fraction = CCUColorCacheFraction.EIGHTH.value,
834
835        prim_alloc_threshold = 0x7,
836        vs_max_inputs_count = 32,
837        max_sets = 8,
838
839        reg_size_vec4 = 96,
840        # Blob limits it to 128 but we hang with 128
841        instr_cache_size = 127,
842        supports_multiview_mask = True,
843        has_z24uint_s8uint = True,
844        tess_use_shared = True,
845        storage_16bit = True,
846        has_tex_filter_cubic = True,
847        has_separate_chroma_filter = True,
848        has_sample_locations = True,
849        has_lpac = True,
850        has_getfiberid = True,
851        has_dp2acc = True,
852        has_dp4acc = True,
853        enable_lrz_fast_clear = True,
854        has_lrz_dir_tracking = True,
855        has_lrz_feedback = True,
856        has_per_view_viewport = True,
857        line_width_min = 1.0,
858        line_width_max = 127.5,
859        has_scalar_alu = True,
860        has_coherent_ubwc_flag_caches = True,
861        has_isam_v = True,
862        has_ssbo_imm_offsets = True,
863        has_early_preamble = True,
864        has_attachment_shading_rate = True,
865        has_ubwc_linear_mipmap_fallback = True,
866        prede_nop_quirk = True,
867        predtf_nop_quirk = True,
868        has_sad = True,
869    )
870
871a7xx_gen1 = A7XXProps(
872        supports_ibo_ubwc = True,
873        fs_must_have_non_zero_constlen_quirk = True,
874        enable_tp_ubwc_flag_hint = True,
875        reading_shading_rate_requires_smask_quirk = True,
876    )
877
878a7xx_gen2 = A7XXProps(
879        stsc_duplication_quirk = True,
880        has_event_write_sample_count = True,
881        ubwc_unorm_snorm_int_compatible = True,
882        supports_ibo_ubwc = True,
883        fs_must_have_non_zero_constlen_quirk = True,
884        # Most devices with a740 have blob v6xx which doesn't have
885        # this hint set. Match them for better compatibility by default.
886        enable_tp_ubwc_flag_hint = False,
887        has_64b_ssbo_atomics = True,
888        has_primitive_shading_rate = True,
889        reading_shading_rate_requires_smask_quirk = True,
890    )
891
892a7xx_gen3 = A7XXProps(
893        has_event_write_sample_count = True,
894        load_inline_uniforms_via_preamble_ldgk = True,
895        load_shader_consts_via_preamble = True,
896        has_gmem_vpc_attr_buf = True,
897        sysmem_vpc_attr_buf_size = 0x20000,
898        gmem_vpc_attr_buf_size = 0xc000,
899        ubwc_unorm_snorm_int_compatible = True,
900        supports_ibo_ubwc = True,
901        has_generic_clear = True,
902        r8g8_faulty_fast_clear_quirk = True,
903        gs_vpc_adjacency_quirk = True,
904        storage_8bit = True,
905        ubwc_all_formats_compatible = True,
906        has_compliant_dp4acc = True,
907        ubwc_coherency_quirk = True,
908        has_persistent_counter = True,
909        has_64b_ssbo_atomics = True,
910        has_primitive_shading_rate = True,
911    )
912
913a730_magic_regs = dict(
914        TPL1_DBG_ECO_CNTL = 0x1000000,
915        GRAS_DBG_ECO_CNTL = 0x800,
916        SP_CHICKEN_BITS = 0x1440,
917        UCHE_CLIENT_PF = 0x00000084,
918        PC_MODE_CNTL = 0x0000003f, # 0x00001f1f in some tests
919        SP_DBG_ECO_CNTL = 0x10000000,
920        RB_DBG_ECO_CNTL = 0x00000000,
921        RB_DBG_ECO_CNTL_blit = 0x00000000,  # is it even needed?
922        RB_UNKNOWN_8E01 = 0x0,
923        VPC_DBG_ECO_CNTL = 0x02000000,
924        UCHE_UNKNOWN_0E12 = 0x3200000,
925
926        RB_UNKNOWN_8E06 = 0x02080000,
927    )
928
929a730_raw_magic_regs = [
930        [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00840004],
931        [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724],
932
933        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00002400],
934        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00000000],
935        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000],
936        [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
937        [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000040],
938        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00008000],
939        [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x20080000],
940        [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21fc7f00],
941        [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00000000],
942        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
943        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
944        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
945        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
946        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
947        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
948        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
949        [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
950
951        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
952        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
953
954        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
955        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
956        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
957        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
958        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
959        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
960
961        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
962
963        [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79,   0x00000000],
964        [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
965        [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
966    ]
967
968a740_magic_regs = dict(
969        # PC_POWER_CNTL = 7,
970        TPL1_DBG_ECO_CNTL = 0x11100000,
971        GRAS_DBG_ECO_CNTL = 0x00004800,
972        SP_CHICKEN_BITS = 0x10001400,
973        UCHE_CLIENT_PF = 0x00000084,
974        # Blob uses 0x1f or 0x1f1f, however these values cause vertices
975        # corruption in some tests.
976        PC_MODE_CNTL = 0x0000003f,
977        SP_DBG_ECO_CNTL = 0x10000000,
978        RB_DBG_ECO_CNTL = 0x00000000,
979        RB_DBG_ECO_CNTL_blit = 0x00000000,  # is it even needed?
980        # HLSQ_DBG_ECO_CNTL = 0x0,
981        RB_UNKNOWN_8E01 = 0x0,
982        VPC_DBG_ECO_CNTL = 0x02000000,
983        UCHE_UNKNOWN_0E12 = 0x00000000,
984
985        RB_UNKNOWN_8E06 = 0x02080000,
986    )
987
988a740_raw_magic_regs = [
989        [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004],
990        [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724],
991
992        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400],
993        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800],
994        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000],
995        [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
996        [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000],
997        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000],
998        [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000],
999        [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600],
1000        [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000],
1001        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
1002        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
1003        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
1004        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
1005        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
1006        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
1007        [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
1008        [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
1009
1010        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
1011        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
1012
1013        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000],
1014        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000],
1015        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
1016        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
1017
1018        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
1019        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
1020        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
1021        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
1022        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
1023        [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
1024
1025        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
1026
1027        [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79,   0x00000000],
1028        [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
1029        [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
1030        [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34,   0x00000000],
1031
1032        [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
1033    ]
1034
1035add_gpus([
1036        # These are named as Adreno730v3 or Adreno725v1.
1037        GPUId(chip_id=0x07030002, name="FD725"),
1038        GPUId(chip_id=0xffff07030002, name="FD725"),
1039    ], A6xxGPUInfo(
1040        CHIP.A7XX,
1041        [a7xx_base, a7xx_gen1, A7XXProps(cmdbuf_start_a725_quirk = True)],
1042        num_ccu = 4,
1043        tile_align_w = 64,
1044        tile_align_h = 32,
1045        num_vsc_pipes = 32,
1046        cs_shared_mem_size = 32 * 1024,
1047        wave_granularity = 2,
1048        fibers_per_sp = 128 * 2 * 16,
1049        highest_bank_bit = 16,
1050        magic_regs = a730_magic_regs,
1051        raw_magic_regs = a730_raw_magic_regs,
1052    ))
1053
1054add_gpus([
1055        GPUId(chip_id=0x07030001, name="FD730"), # KGSL, no speedbin data
1056        GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback
1057    ], A6xxGPUInfo(
1058        CHIP.A7XX,
1059        [a7xx_base, a7xx_gen1],
1060        num_ccu = 4,
1061        tile_align_w = 64,
1062        tile_align_h = 32,
1063        num_vsc_pipes = 32,
1064        cs_shared_mem_size = 32 * 1024,
1065        wave_granularity = 2,
1066        fibers_per_sp = 128 * 2 * 16,
1067        highest_bank_bit = 16,
1068        magic_regs = a730_magic_regs,
1069        raw_magic_regs = a730_raw_magic_regs,
1070    ))
1071
1072add_gpus([
1073        GPUId(chip_id=0x43030B00, name="FD735")
1074    ], A6xxGPUInfo(
1075        CHIP.A7XX,
1076        [a7xx_base, a7xx_gen2, A7XXProps(enable_tp_ubwc_flag_hint = True)],
1077        num_ccu = 3,
1078        tile_align_w = 96,
1079        tile_align_h = 32,
1080        num_vsc_pipes = 32,
1081        cs_shared_mem_size = 32 * 1024,
1082        wave_granularity = 2,
1083        fibers_per_sp = 128 * 2 * 16,
1084        magic_regs = dict(
1085            TPL1_DBG_ECO_CNTL = 0x11100000,
1086            GRAS_DBG_ECO_CNTL = 0x00004800,
1087            SP_CHICKEN_BITS = 0x10001400,
1088            UCHE_CLIENT_PF = 0x00000084,
1089            PC_MODE_CNTL = 0x0000001f,
1090            SP_DBG_ECO_CNTL = 0x10000000,
1091            RB_DBG_ECO_CNTL = 0x00000001,
1092            RB_DBG_ECO_CNTL_blit = 0x00000001,  # is it even needed?
1093            RB_UNKNOWN_8E01 = 0x0,
1094            VPC_DBG_ECO_CNTL = 0x02000000,
1095            UCHE_UNKNOWN_0E12 = 0x00000000,
1096
1097            RB_UNKNOWN_8E06 = 0x02080000,
1098        ),
1099        raw_magic_regs = [
1100            [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000],
1101            [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724],
1102
1103            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400],
1104            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800],
1105            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000],
1106            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
1107            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000],
1108            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000],
1109            [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000],
1110            [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600],
1111            [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000],
1112            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
1113            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
1114            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
1115            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
1116            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
1117            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
1118            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
1119            [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
1120
1121            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
1122            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
1123
1124            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000],
1125            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000],
1126            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
1127            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
1128
1129            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
1130            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
1131            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
1132            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
1133            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
1134            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
1135
1136            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
1137
1138            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79,   0x00000000],
1139            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
1140            [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
1141            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34,   0x00000000],
1142
1143            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
1144        ],
1145    ))
1146
1147add_gpus([
1148        GPUId(740), # Deprecated, used for dev kernels.
1149        GPUId(chip_id=0x43050a01, name="FD740"), # KGSL, no speedbin data
1150        GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback
1151    ], A6xxGPUInfo(
1152        CHIP.A7XX,
1153        [a7xx_base, a7xx_gen2],
1154        num_ccu = 6,
1155        tile_align_w = 96,
1156        tile_align_h = 32,
1157        num_vsc_pipes = 32,
1158        cs_shared_mem_size = 32 * 1024,
1159        wave_granularity = 2,
1160        fibers_per_sp = 128 * 2 * 16,
1161        highest_bank_bit = 16,
1162        magic_regs = a740_magic_regs,
1163        raw_magic_regs = a740_raw_magic_regs,
1164    ))
1165
1166add_gpus([
1167        GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"),
1168    ], A6xxGPUInfo(
1169        CHIP.A7XX,
1170        [a7xx_base, a7xx_gen2, A7XXProps(compute_constlen_quirk = True)],
1171        num_ccu = 6,
1172        tile_align_w = 96,
1173        tile_align_h = 32,
1174        num_vsc_pipes = 32,
1175        cs_shared_mem_size = 32 * 1024,
1176        wave_granularity = 2,
1177        fibers_per_sp = 128 * 2 * 16,
1178        highest_bank_bit = 16,
1179        magic_regs = a740_magic_regs,
1180        raw_magic_regs = a740_raw_magic_regs,
1181    ))
1182
1183# Values from blob v676.0
1184add_gpus([
1185        GPUId(chip_id=0x43050a00, name="FDA32"), # Adreno A32 (G3x Gen 2)
1186        GPUId(chip_id=0xffff43050a00, name="FDA32"),
1187    ], A6xxGPUInfo(
1188        CHIP.A7XX,
1189        [a7xx_base, a7xx_gen2, A7XXProps(cmdbuf_start_a725_quirk = True)],
1190        num_ccu = 6,
1191        tile_align_w = 96,
1192        tile_align_h = 32,
1193        num_vsc_pipes = 32,
1194        cs_shared_mem_size = 32 * 1024,
1195        wave_granularity = 2,
1196        fibers_per_sp = 128 * 2 * 16,
1197        magic_regs = a740_magic_regs,
1198        raw_magic_regs = [
1199            [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004],
1200            [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00000700],
1201
1202            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400],
1203            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430820],
1204            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000],
1205            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
1206            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080],
1207            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000],
1208            [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000],
1209            [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600],
1210            [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000],
1211            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
1212            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
1213            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
1214            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
1215            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
1216            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
1217            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
1218            [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
1219
1220            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
1221            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
1222
1223            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000],
1224            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000],
1225            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
1226            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
1227
1228            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
1229            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
1230            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
1231            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
1232            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
1233            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
1234
1235            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
1236
1237            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79,   0x00000000],
1238            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
1239            [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
1240        ],
1241    ))
1242
1243add_gpus([
1244        GPUId(chip_id=0x43050b00, name="FD740v3"), # Quest 3
1245        GPUId(chip_id=0xffff43050b00, name="FD740v3"),
1246    ], A6xxGPUInfo(
1247        CHIP.A7XX,
1248        [a7xx_base, a7xx_gen2, A7XXProps(enable_tp_ubwc_flag_hint = True)],
1249        num_ccu = 6,
1250        tile_align_w = 96,
1251        tile_align_h = 32,
1252        num_vsc_pipes = 32,
1253        cs_shared_mem_size = 32 * 1024,
1254        wave_granularity = 2,
1255        fibers_per_sp = 128 * 2 * 16,
1256        magic_regs = dict(
1257            # PC_POWER_CNTL = 7,
1258            TPL1_DBG_ECO_CNTL = 0x11100000,
1259            GRAS_DBG_ECO_CNTL = 0x00004800,
1260            SP_CHICKEN_BITS = 0x10001400,
1261            UCHE_CLIENT_PF = 0x00000084,
1262            # Blob uses 0x1f or 0x1f1f, however these values cause vertices
1263            # corruption in some tests.
1264            PC_MODE_CNTL = 0x0000003f,
1265            SP_DBG_ECO_CNTL = 0x10000000,
1266            RB_DBG_ECO_CNTL = 0x00000001,
1267            RB_DBG_ECO_CNTL_blit = 0x00000000,  # is it even needed?
1268            # HLSQ_DBG_ECO_CNTL = 0x0,
1269            RB_UNKNOWN_8E01 = 0x0,
1270            VPC_DBG_ECO_CNTL = 0x02000000,
1271            UCHE_UNKNOWN_0E12 = 0x00000000,
1272
1273            RB_UNKNOWN_8E06 = 0x02080000,
1274        ),
1275        raw_magic_regs = a740_raw_magic_regs,
1276    ))
1277
1278add_gpus([
1279        GPUId(chip_id=0x43051401, name="FD750"), # KGSL, no speedbin data
1280        GPUId(chip_id=0xffff43051401, name="FD750"), # Default no-speedbin fallback
1281    ], A6xxGPUInfo(
1282        CHIP.A7XX,
1283        [a7xx_base, a7xx_gen3],
1284        num_ccu = 6,
1285        tile_align_w = 96,
1286        tile_align_h = 32,
1287        num_vsc_pipes = 32,
1288        cs_shared_mem_size = 32 * 1024,
1289        wave_granularity = 2,
1290        fibers_per_sp = 128 * 2 * 16,
1291        highest_bank_bit = 16,
1292        magic_regs = dict(
1293            TPL1_DBG_ECO_CNTL = 0x11100000,
1294            GRAS_DBG_ECO_CNTL = 0x00004800,
1295            SP_CHICKEN_BITS = 0x10000400,
1296            PC_MODE_CNTL = 0x00003f1f,
1297            SP_DBG_ECO_CNTL = 0x10000000,
1298            RB_DBG_ECO_CNTL = 0x00000001,
1299            RB_DBG_ECO_CNTL_blit = 0x00000001,
1300            RB_UNKNOWN_8E01 = 0x0,
1301            VPC_DBG_ECO_CNTL = 0x02000000,
1302            UCHE_UNKNOWN_0E12 = 0x40000000,
1303
1304            RB_UNKNOWN_8E06 = 0x02082000,
1305        ),
1306        raw_magic_regs = [
1307            [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000],
1308            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000],
1309            [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080],
1310            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000000],
1311            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00431800],
1312            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00800000],
1313            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000],
1314            [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000],
1315            [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600],
1316            [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000],
1317            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000],
1318            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000],
1319            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080],
1320            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000],
1321            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000],
1322            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000],
1323            [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000],
1324            [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000],
1325            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840],
1326            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62],
1327            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000],
1328            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000],
1329            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000],
1330            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000],
1331
1332            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2,   0x00000000],
1333            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000],
1334            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4,   0x00000000],
1335            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000],
1336            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6,   0x00000000],
1337            [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000],
1338
1339            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000],
1340
1341            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899,   0x00000000],
1342            [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5,   0x00000000],
1343            [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34,   0x00000000],
1344
1345            [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000],
1346
1347            [0x930a, 0],
1348            [0x960a, 1],
1349            [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS_CONTROL, 0],
1350            [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS, 0],
1351        ],
1352    ))
1353
1354template = """\
1355/* Copyright © 2021 Google, Inc.
1356 *
1357 * SPDX-License-Identifier: MIT
1358 */
1359
1360#include "freedreno_dev_info.h"
1361#include "util/u_debug.h"
1362#include "util/log.h"
1363
1364#include <stdlib.h>
1365
1366/* Map python to C: */
1367#define True true
1368#define False false
1369
1370%for info in s.gpu_infos:
1371static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)};
1372%endfor
1373
1374static const struct fd_dev_rec fd_dev_recs[] = {
1375%for id, info in s.gpus.items():
1376   { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} },
1377%endfor
1378};
1379
1380void
1381fd_dev_info_apply_dbg_options(struct fd_dev_info *info)
1382{
1383    const char *env = debug_get_option("FD_DEV_FEATURES", NULL);
1384    if (!env || !*env)
1385        return;
1386
1387    char *features = strdup(env);
1388    char *feature, *feature_end;
1389    feature = strtok_r(features, ":", &feature_end);
1390    while (feature != NULL) {
1391        char *name, *name_end;
1392        name = strtok_r(feature, "=", &name_end);
1393
1394        if (!name) {
1395            mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", feature);
1396            exit(1);
1397        }
1398
1399        char *value = strtok_r(NULL, "=", &name_end);
1400
1401        feature = strtok_r(NULL, ":", &feature_end);
1402
1403%for (prop, gen), val in unique_props.items():
1404  <%
1405    if isinstance(val, bool):
1406        parse_value = "debug_parse_bool_option"
1407    else:
1408        parse_value = "debug_parse_num_option"
1409  %>
1410        if (strcmp(name, "${prop}") == 0) {
1411            info->${gen}.${prop} = ${parse_value}(value, info->${gen}.${prop});
1412            continue;
1413        }
1414%endfor
1415
1416        mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", name);
1417        exit(1);
1418    }
1419
1420    free(features);
1421}
1422"""
1423
1424print(Template(template).render(s=s, unique_props=A6XXProps.unique_props))
1425
1426