1# 2# Copyright © 2021 Google, Inc. 3# 4# SPDX-License-Identifier: MIT 5 6from mako.template import Template 7import sys 8import argparse 9from enum import Enum 10 11def max_bitfield_val(high, low, shift): 12 return ((1 << (high - low)) - 1) << shift 13 14 15parser = argparse.ArgumentParser() 16parser.add_argument('-p', '--import-path', required=True) 17args = parser.parse_args() 18sys.path.insert(0, args.import_path) 19 20from a6xx import * 21 22 23class CHIP(Enum): 24 A2XX = 2 25 A3XX = 3 26 A4XX = 4 27 A5XX = 5 28 A6XX = 6 29 A7XX = 7 30 31class CCUColorCacheFraction(Enum): 32 FULL = 0 33 HALF = 1 34 QUARTER = 2 35 EIGHTH = 3 36 37 38class State(object): 39 def __init__(self): 40 # List of unique device-info structs, multiple different GPU ids 41 # can map to a single info struct in cases where the differences 42 # are not sw visible, or the only differences are parameters 43 # queried from the kernel (like GMEM size) 44 self.gpu_infos = [] 45 46 # Table mapping GPU id to device-info struct 47 self.gpus = {} 48 49 def info_index(self, gpu_info): 50 i = 0 51 for info in self.gpu_infos: 52 if gpu_info == info: 53 return i 54 i += 1 55 raise Error("invalid info") 56 57s = State() 58 59def add_gpus(ids, info): 60 for id in ids: 61 s.gpus[id] = info 62 63class GPUId(object): 64 def __init__(self, gpu_id = None, chip_id = None, name=None): 65 if chip_id is None: 66 assert(gpu_id is not None) 67 val = gpu_id 68 core = int(val / 100) 69 val -= (core * 100) 70 major = int(val / 10) 71 val -= (major * 10) 72 minor = val 73 chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff 74 self.chip_id = chip_id 75 if gpu_id is None: 76 gpu_id = 0 77 self.gpu_id = gpu_id 78 if name is None: 79 assert(gpu_id != 0) 80 name = "FD%d" % gpu_id 81 self.name = name 82 83class Struct(object): 84 """A helper class that stringifies itself to a 'C' struct initializer 85 """ 86 def __str__(self): 87 s = "{" 88 for name, value in vars(self).items(): 89 s += "." + name + "=" + str(value) + "," 90 return s + "}" 91 92class GPUInfo(Struct): 93 """Base class for any generation of adreno, consists of GMEM layout 94 related parameters 95 96 Note that tile_max_h is normally only constrained by corresponding 97 bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h 98 tends to have lower limits, in which case a comment will describe 99 the bitfield size/shift 100 """ 101 def __init__(self, chip, gmem_align_w, gmem_align_h, 102 tile_align_w, tile_align_h, 103 tile_max_w, tile_max_h, num_vsc_pipes, 104 cs_shared_mem_size, num_sp_cores, wave_granularity, fibers_per_sp, 105 highest_bank_bit = 0, ubwc_swizzle = 0x7, macrotile_mode = 0, 106 threadsize_base = 64, max_waves = 16): 107 self.chip = chip.value 108 self.gmem_align_w = gmem_align_w 109 self.gmem_align_h = gmem_align_h 110 self.tile_align_w = tile_align_w 111 self.tile_align_h = tile_align_h 112 self.tile_max_w = tile_max_w 113 self.tile_max_h = tile_max_h 114 self.num_vsc_pipes = num_vsc_pipes 115 self.cs_shared_mem_size = cs_shared_mem_size 116 self.num_sp_cores = num_sp_cores 117 self.wave_granularity = wave_granularity 118 self.fibers_per_sp = fibers_per_sp 119 self.threadsize_base = threadsize_base 120 self.max_waves = max_waves 121 self.highest_bank_bit = highest_bank_bit 122 self.ubwc_swizzle = ubwc_swizzle 123 self.macrotile_mode = macrotile_mode 124 125 s.gpu_infos.append(self) 126 127 128class A6xxGPUInfo(GPUInfo): 129 """The a6xx generation has a lot more parameters, and is broken down 130 into distinct sub-generations. The template parameter avoids 131 duplication of parameters that are unique to the sub-generation. 132 """ 133 def __init__(self, chip, template, num_ccu, 134 tile_align_w, tile_align_h, num_vsc_pipes, 135 cs_shared_mem_size, wave_granularity, fibers_per_sp, 136 magic_regs, raw_magic_regs = None, highest_bank_bit = 15, 137 ubwc_swizzle = 0x6, macrotile_mode = 1, 138 threadsize_base = 64, max_waves = 16): 139 if chip == CHIP.A6XX: 140 tile_max_w = 1024 # max_bitfield_val(5, 0, 5) 141 tile_max_h = max_bitfield_val(14, 8, 4) # 1008 142 else: 143 tile_max_w = 1728 144 tile_max_h = 1728 145 146 super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4, 147 tile_align_w = tile_align_w, 148 tile_align_h = tile_align_h, 149 tile_max_w = tile_max_w, 150 tile_max_h = tile_max_h, 151 num_vsc_pipes = num_vsc_pipes, 152 cs_shared_mem_size = cs_shared_mem_size, 153 num_sp_cores = num_ccu, # The # of SP cores seems to always match # of CCU 154 wave_granularity = wave_granularity, 155 fibers_per_sp = fibers_per_sp, 156 highest_bank_bit = highest_bank_bit, 157 ubwc_swizzle = ubwc_swizzle, 158 macrotile_mode = macrotile_mode, 159 threadsize_base = threadsize_base, 160 max_waves = max_waves) 161 162 self.num_ccu = num_ccu 163 164 self.a6xx = Struct() 165 self.a7xx = Struct() 166 167 self.a6xx.magic = Struct() 168 169 for name, val in magic_regs.items(): 170 setattr(self.a6xx.magic, name, val) 171 172 if raw_magic_regs: 173 self.a6xx.magic_raw = [[int(r[0]), r[1]] for r in raw_magic_regs] 174 175 templates = template if isinstance(template, list) else [template] 176 for template in templates: 177 template.apply_props(self) 178 179 180 def __str__(self): 181 return super(A6xxGPUInfo, self).__str__().replace('[', '{').replace("]", "}") 182 183 184# a2xx is really two sub-generations, a20x and a22x, but we don't currently 185# capture that in the device-info tables 186add_gpus([ 187 GPUId(200), 188 GPUId(201), 189 GPUId(205), 190 GPUId(220), 191 ], GPUInfo( 192 CHIP.A2XX, 193 gmem_align_w = 32, gmem_align_h = 32, 194 tile_align_w = 32, tile_align_h = 32, 195 tile_max_w = 512, 196 tile_max_h = ~0, # TODO 197 num_vsc_pipes = 8, 198 cs_shared_mem_size = 0, 199 num_sp_cores = 0, # TODO 200 wave_granularity = 2, 201 fibers_per_sp = 0, # TODO 202 threadsize_base = 8, # TODO: Confirm this 203 )) 204 205add_gpus([ 206 GPUId(305), 207 GPUId(307), 208 GPUId(320), 209 GPUId(330), 210 GPUId(chip_id=0x03000512, name="FD305B"), 211 GPUId(chip_id=0x03000620, name="FD306A"), 212 ], GPUInfo( 213 CHIP.A3XX, 214 gmem_align_w = 32, gmem_align_h = 32, 215 tile_align_w = 32, tile_align_h = 32, 216 tile_max_w = 992, # max_bitfield_val(4, 0, 5) 217 tile_max_h = max_bitfield_val(9, 5, 5), 218 num_vsc_pipes = 8, 219 cs_shared_mem_size = 32 * 1024, 220 num_sp_cores = 0, # TODO 221 wave_granularity = 2, 222 fibers_per_sp = 0, # TODO 223 threadsize_base = 8, 224 )) 225 226add_gpus([ 227 GPUId(405), 228 GPUId(420), 229 GPUId(430), 230 ], GPUInfo( 231 CHIP.A4XX, 232 gmem_align_w = 32, gmem_align_h = 32, 233 tile_align_w = 32, tile_align_h = 32, 234 tile_max_w = 1024, # max_bitfield_val(4, 0, 5) 235 tile_max_h = max_bitfield_val(9, 5, 5), 236 num_vsc_pipes = 8, 237 cs_shared_mem_size = 32 * 1024, 238 num_sp_cores = 0, # TODO 239 wave_granularity = 2, 240 fibers_per_sp = 0, # TODO 241 threadsize_base = 32, # TODO: Confirm this 242 )) 243 244add_gpus([ 245 GPUId(505), 246 GPUId(506), 247 GPUId(508), 248 GPUId(509), 249 ], GPUInfo( 250 CHIP.A5XX, 251 gmem_align_w = 64, gmem_align_h = 32, 252 tile_align_w = 64, tile_align_h = 32, 253 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 254 tile_max_h = max_bitfield_val(16, 9, 5), 255 num_vsc_pipes = 16, 256 cs_shared_mem_size = 32 * 1024, 257 num_sp_cores = 1, 258 wave_granularity = 2, 259 fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. 260 highest_bank_bit = 14, 261 threadsize_base = 32, 262 )) 263 264add_gpus([ 265 GPUId(510), 266 GPUId(512), 267 ], GPUInfo( 268 CHIP.A5XX, 269 gmem_align_w = 64, gmem_align_h = 32, 270 tile_align_w = 64, tile_align_h = 32, 271 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 272 tile_max_h = max_bitfield_val(16, 9, 5), 273 num_vsc_pipes = 16, 274 cs_shared_mem_size = 32 * 1024, 275 num_sp_cores = 2, 276 wave_granularity = 2, 277 fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. 278 highest_bank_bit = 14, 279 threadsize_base = 32, 280 )) 281 282add_gpus([ 283 GPUId(530), 284 GPUId(540), 285 ], GPUInfo( 286 CHIP.A5XX, 287 gmem_align_w = 64, gmem_align_h = 32, 288 tile_align_w = 64, tile_align_h = 32, 289 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 290 tile_max_h = max_bitfield_val(16, 9, 5), 291 num_vsc_pipes = 16, 292 cs_shared_mem_size = 32 * 1024, 293 num_sp_cores = 4, 294 wave_granularity = 2, 295 fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. 296 highest_bank_bit = 15, 297 threadsize_base = 32, 298 )) 299 300 301class A6XXProps(dict): 302 unique_props = dict() 303 def apply_gen_props(self, gen, gpu_info): 304 for name, val in self.items(): 305 setattr(getattr(gpu_info, gen), name, val) 306 A6XXProps.unique_props[(name, gen)] = val 307 308 def apply_props(self, gpu_info): 309 self.apply_gen_props("a6xx", gpu_info) 310 311 312class A7XXProps(A6XXProps): 313 def apply_props(self, gpu_info): 314 self.apply_gen_props("a7xx", gpu_info) 315 316 317# Props could be modified with env var: 318# FD_DEV_FEATURES=%feature_name%=%value%:%feature_name%=%value%:... 319# e.g. 320# FD_DEV_FEATURES=has_fs_tex_prefetch=0:max_sets=4 321 322a6xx_base = A6XXProps( 323 has_cp_reg_write = True, 324 has_8bpp_ubwc = True, 325 has_gmem_fast_clear = True, 326 has_hw_multiview = True, 327 has_fs_tex_prefetch = True, 328 has_sampler_minmax = True, 329 330 supports_double_threadsize = True, 331 332 sysmem_per_ccu_depth_cache_size = 64 * 1024, 333 sysmem_per_ccu_color_cache_size = 64 * 1024, 334 gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value, 335 336 prim_alloc_threshold = 0x7, 337 vs_max_inputs_count = 32, 338 max_sets = 5, 339 line_width_min = 1.0, 340 line_width_max = 1.0, 341 ) 342 343 344# a6xx and a7xx can be divided into distinct sub-generations, where certain 345# device-info parameters are keyed to the sub-generation. These templates 346# reduce the copypaste 347 348a6xx_gen1_low = A6XXProps( 349 reg_size_vec4 = 48, 350 instr_cache_size = 64, 351 indirect_draw_wfm_quirk = True, 352 depth_bounds_require_depth_test_quirk = True, 353 354 has_gmem_fast_clear = False, 355 has_hw_multiview = False, 356 has_sampler_minmax = False, 357 has_fs_tex_prefetch = False, 358 sysmem_per_ccu_color_cache_size = 8 * 1024, 359 sysmem_per_ccu_depth_cache_size = 8 * 1024, 360 gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value, 361 vs_max_inputs_count = 16, 362 supports_double_threadsize = False, 363 ) 364 365a6xx_gen1 = A6XXProps( 366 reg_size_vec4 = 96, 367 instr_cache_size = 64, 368 indirect_draw_wfm_quirk = True, 369 depth_bounds_require_depth_test_quirk = True, 370 ) 371 372a6xx_gen2 = A6XXProps( 373 reg_size_vec4 = 96, 374 instr_cache_size = 64, # TODO 375 supports_multiview_mask = True, 376 has_z24uint_s8uint = True, 377 indirect_draw_wfm_quirk = True, 378 depth_bounds_require_depth_test_quirk = True, # TODO: check if true 379 has_dp2acc = False, # TODO: check if true 380 has_8bpp_ubwc = False, 381 ) 382 383a6xx_gen3 = A6XXProps( 384 reg_size_vec4 = 64, 385 # Blob limits it to 128 but we hang with 128 386 instr_cache_size = 127, 387 supports_multiview_mask = True, 388 has_z24uint_s8uint = True, 389 tess_use_shared = True, 390 storage_16bit = True, 391 has_tex_filter_cubic = True, 392 has_separate_chroma_filter = True, 393 has_sample_locations = True, 394 has_8bpp_ubwc = False, 395 has_dp2acc = True, 396 has_lrz_dir_tracking = True, 397 enable_lrz_fast_clear = True, 398 lrz_track_quirk = True, 399 has_lrz_feedback = True, 400 has_per_view_viewport = True, 401 has_scalar_alu = True, 402 has_early_preamble = True, 403 prede_nop_quirk = True, 404 ) 405 406a6xx_gen4 = A6XXProps( 407 reg_size_vec4 = 64, 408 # Blob limits it to 128 but we hang with 128 409 instr_cache_size = 127, 410 supports_multiview_mask = True, 411 has_z24uint_s8uint = True, 412 tess_use_shared = True, 413 storage_16bit = True, 414 has_tex_filter_cubic = True, 415 has_separate_chroma_filter = True, 416 has_sample_locations = True, 417 has_cp_reg_write = False, 418 has_8bpp_ubwc = False, 419 has_lpac = True, 420 has_legacy_pipeline_shading_rate = True, 421 has_getfiberid = True, 422 has_dp2acc = True, 423 has_dp4acc = True, 424 enable_lrz_fast_clear = True, 425 has_lrz_dir_tracking = True, 426 has_lrz_feedback = True, 427 has_per_view_viewport = True, 428 has_scalar_alu = True, 429 has_isam_v = True, 430 has_ssbo_imm_offsets = True, 431 has_ubwc_linear_mipmap_fallback = True, 432 # TODO: there seems to be a quirk where at least rcp can't be in an 433 # early preamble. a660 at least is affected. 434 #has_early_preamble = True, 435 prede_nop_quirk = True, 436 predtf_nop_quirk = True, 437 has_sad = True, 438 ) 439 440add_gpus([ 441 GPUId(605), # TODO: Test it, based only on libwrapfake dumps 442 GPUId(608), # TODO: Test it, based only on libwrapfake dumps 443 GPUId(610), 444 GPUId(612), # TODO: Test it, based only on libwrapfake dumps 445 ], A6xxGPUInfo( 446 CHIP.A6XX, 447 [a6xx_base, a6xx_gen1_low], 448 num_ccu = 1, 449 tile_align_w = 32, 450 tile_align_h = 16, 451 num_vsc_pipes = 16, 452 cs_shared_mem_size = 16 * 1024, 453 wave_granularity = 1, 454 fibers_per_sp = 128 * 16, 455 highest_bank_bit = 13, 456 ubwc_swizzle = 0x7, 457 macrotile_mode = 0, 458 magic_regs = dict( 459 PC_POWER_CNTL = 0, 460 TPL1_DBG_ECO_CNTL = 0, 461 GRAS_DBG_ECO_CNTL = 0, 462 SP_CHICKEN_BITS = 0, 463 UCHE_CLIENT_PF = 0x00000004, 464 PC_MODE_CNTL = 0xf, 465 SP_DBG_ECO_CNTL = 0x0, 466 RB_DBG_ECO_CNTL = 0x04100000, 467 RB_DBG_ECO_CNTL_blit = 0x04100000, 468 HLSQ_DBG_ECO_CNTL = 0, 469 RB_UNKNOWN_8E01 = 0x00000001, 470 VPC_DBG_ECO_CNTL = 0x0, 471 UCHE_UNKNOWN_0E12 = 0x10000000, 472 ), 473 )) 474 475add_gpus([ 476 GPUId(615), 477 GPUId(616), 478 GPUId(618), 479 GPUId(619), 480 ], A6xxGPUInfo( 481 CHIP.A6XX, 482 [a6xx_base, a6xx_gen1], 483 num_ccu = 1, 484 tile_align_w = 32, 485 tile_align_h = 32, 486 num_vsc_pipes = 32, 487 cs_shared_mem_size = 32 * 1024, 488 wave_granularity = 2, 489 fibers_per_sp = 128 * 16, 490 highest_bank_bit = 14, 491 macrotile_mode = 0, 492 magic_regs = dict( 493 PC_POWER_CNTL = 0, 494 TPL1_DBG_ECO_CNTL = 0x00108000, 495 GRAS_DBG_ECO_CNTL = 0x00000880, 496 SP_CHICKEN_BITS = 0x00000430, 497 UCHE_CLIENT_PF = 0x00000004, 498 PC_MODE_CNTL = 0x1f, 499 SP_DBG_ECO_CNTL = 0x0, 500 RB_DBG_ECO_CNTL = 0x04100000, 501 RB_DBG_ECO_CNTL_blit = 0x04100000, 502 HLSQ_DBG_ECO_CNTL = 0x00080000, 503 RB_UNKNOWN_8E01 = 0x00000001, 504 VPC_DBG_ECO_CNTL = 0x0, 505 UCHE_UNKNOWN_0E12 = 0x00000001 506 ) 507 )) 508 509add_gpus([ 510 GPUId(620), 511 ], A6xxGPUInfo( 512 CHIP.A6XX, 513 [a6xx_base, a6xx_gen1], 514 num_ccu = 1, 515 tile_align_w = 32, 516 tile_align_h = 16, 517 num_vsc_pipes = 32, 518 cs_shared_mem_size = 32 * 1024, 519 wave_granularity = 2, 520 fibers_per_sp = 128 * 16, 521 magic_regs = dict( 522 PC_POWER_CNTL = 0, 523 TPL1_DBG_ECO_CNTL = 0x01008000, 524 GRAS_DBG_ECO_CNTL = 0x0, 525 SP_CHICKEN_BITS = 0x00000400, 526 UCHE_CLIENT_PF = 0x00000004, 527 PC_MODE_CNTL = 0x1f, 528 SP_DBG_ECO_CNTL = 0x01000000, 529 RB_DBG_ECO_CNTL = 0x04100000, 530 RB_DBG_ECO_CNTL_blit = 0x04100000, 531 HLSQ_DBG_ECO_CNTL = 0x0, 532 RB_UNKNOWN_8E01 = 0x0, 533 VPC_DBG_ECO_CNTL = 0x02000000, 534 UCHE_UNKNOWN_0E12 = 0x00000001 535 ) 536 )) 537 538add_gpus([ 539 GPUId(chip_id=0xffff06020100, name="FD621"), 540 ], A6xxGPUInfo( 541 CHIP.A6XX, 542 [a6xx_base, a6xx_gen3, A6XXProps(lrz_track_quirk = False)], 543 num_ccu = 2, 544 tile_align_w = 96, 545 tile_align_h = 16, 546 num_vsc_pipes = 32, 547 cs_shared_mem_size = 32 * 1024, 548 wave_granularity = 2, 549 fibers_per_sp = 128 * 2 * 16, 550 magic_regs = dict( 551 PC_POWER_CNTL = 0, 552 # this seems to be a chicken bit that fixes cubic filtering: 553 TPL1_DBG_ECO_CNTL = 0x01008000, 554 GRAS_DBG_ECO_CNTL = 0x0, 555 SP_CHICKEN_BITS = 0x00001400, 556 # UCHE_CLIENT_PF = 0x00000004, 557 PC_MODE_CNTL = 0x1f, 558 SP_DBG_ECO_CNTL = 0x03000000, 559 RB_DBG_ECO_CNTL = 0x04100000, 560 RB_DBG_ECO_CNTL_blit = 0x04100000, 561 HLSQ_DBG_ECO_CNTL = 0x0, 562 RB_UNKNOWN_8E01 = 0x0, 563 VPC_DBG_ECO_CNTL = 0x02000000, 564 UCHE_UNKNOWN_0E12 = 0x00000001 565 ) 566 )) 567 568add_gpus([ 569 GPUId(630), 570 ], A6xxGPUInfo( 571 CHIP.A6XX, 572 [a6xx_base, a6xx_gen1], 573 num_ccu = 2, 574 tile_align_w = 32, 575 tile_align_h = 16, 576 num_vsc_pipes = 32, 577 cs_shared_mem_size = 32 * 1024, 578 wave_granularity = 2, 579 fibers_per_sp = 128 * 16, 580 highest_bank_bit = 15, 581 macrotile_mode = 0, 582 magic_regs = dict( 583 PC_POWER_CNTL = 1, 584 TPL1_DBG_ECO_CNTL = 0x00108000, 585 GRAS_DBG_ECO_CNTL = 0x00000880, 586 SP_CHICKEN_BITS = 0x00001430, 587 UCHE_CLIENT_PF = 0x00000004, 588 PC_MODE_CNTL = 0x1f, 589 SP_DBG_ECO_CNTL = 0x0, 590 RB_DBG_ECO_CNTL = 0x04100000, 591 RB_DBG_ECO_CNTL_blit = 0x05100000, 592 HLSQ_DBG_ECO_CNTL = 0x00080000, 593 RB_UNKNOWN_8E01 = 0x00000001, 594 VPC_DBG_ECO_CNTL = 0x0, 595 UCHE_UNKNOWN_0E12 = 0x10000001 596 ) 597 )) 598 599add_gpus([ 600 GPUId(640), 601 ], A6xxGPUInfo( 602 CHIP.A6XX, 603 [a6xx_base, a6xx_gen2], 604 num_ccu = 2, 605 tile_align_w = 32, 606 tile_align_h = 16, 607 num_vsc_pipes = 32, 608 cs_shared_mem_size = 32 * 1024, 609 wave_granularity = 2, 610 fibers_per_sp = 128 * 4 * 16, 611 highest_bank_bit = 15, 612 macrotile_mode = 0, 613 magic_regs = dict( 614 PC_POWER_CNTL = 1, 615 TPL1_DBG_ECO_CNTL = 0x00008000, 616 GRAS_DBG_ECO_CNTL = 0x0, 617 SP_CHICKEN_BITS = 0x00000420, 618 UCHE_CLIENT_PF = 0x00000004, 619 PC_MODE_CNTL = 0x1f, 620 SP_DBG_ECO_CNTL = 0x0, 621 RB_DBG_ECO_CNTL = 0x04100000, 622 RB_DBG_ECO_CNTL_blit = 0x04100000, 623 HLSQ_DBG_ECO_CNTL = 0x0, 624 RB_UNKNOWN_8E01 = 0x00000001, 625 VPC_DBG_ECO_CNTL = 0x02000000, 626 UCHE_UNKNOWN_0E12 = 0x00000001 627 ) 628 )) 629 630add_gpus([ 631 GPUId(680), 632 ], A6xxGPUInfo( 633 CHIP.A6XX, 634 [a6xx_base, a6xx_gen2], 635 num_ccu = 4, 636 tile_align_w = 64, 637 tile_align_h = 32, 638 num_vsc_pipes = 32, 639 cs_shared_mem_size = 32 * 1024, 640 wave_granularity = 2, 641 fibers_per_sp = 128 * 4 * 16, 642 highest_bank_bit = 15, 643 macrotile_mode = 0, 644 magic_regs = dict( 645 PC_POWER_CNTL = 3, 646 TPL1_DBG_ECO_CNTL = 0x00108000, 647 GRAS_DBG_ECO_CNTL = 0x0, 648 SP_CHICKEN_BITS = 0x00001430, 649 UCHE_CLIENT_PF = 0x00000004, 650 PC_MODE_CNTL = 0x1f, 651 SP_DBG_ECO_CNTL = 0x0, 652 RB_DBG_ECO_CNTL = 0x04100000, 653 RB_DBG_ECO_CNTL_blit = 0x04100000, 654 HLSQ_DBG_ECO_CNTL = 0x0, 655 RB_UNKNOWN_8E01 = 0x00000001, 656 VPC_DBG_ECO_CNTL = 0x02000000, 657 UCHE_UNKNOWN_0E12 = 0x00000001 658 ) 659 )) 660 661add_gpus([ 662 GPUId(650), 663 ], A6xxGPUInfo( 664 CHIP.A6XX, 665 [a6xx_base, a6xx_gen3], 666 num_ccu = 3, 667 tile_align_w = 96, 668 tile_align_h = 16, 669 num_vsc_pipes = 32, 670 cs_shared_mem_size = 32 * 1024, 671 wave_granularity = 2, 672 fibers_per_sp = 128 * 2 * 16, 673 highest_bank_bit = 16, 674 magic_regs = dict( 675 PC_POWER_CNTL = 2, 676 # this seems to be a chicken bit that fixes cubic filtering: 677 TPL1_DBG_ECO_CNTL = 0x01008000, 678 GRAS_DBG_ECO_CNTL = 0x0, 679 SP_CHICKEN_BITS = 0x00001400, 680 UCHE_CLIENT_PF = 0x00000004, 681 PC_MODE_CNTL = 0x1f, 682 SP_DBG_ECO_CNTL = 0x01000000, 683 RB_DBG_ECO_CNTL = 0x04100000, 684 RB_DBG_ECO_CNTL_blit = 0x04100000, 685 HLSQ_DBG_ECO_CNTL = 0x0, 686 RB_UNKNOWN_8E01 = 0x0, 687 VPC_DBG_ECO_CNTL = 0x02000000, 688 UCHE_UNKNOWN_0E12 = 0x00000001 689 ) 690 )) 691 692add_gpus([ 693 # These are all speedbins/variants of A635 694 GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"), 695 GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"), 696 GPUId(chip_id=0x006006030500, name="Adreno 7c+ Gen 3 Lite"), 697 GPUId(chip_id=0x00ac06030500, name="FD643"), # e.g. QCM6490, Fairphone 5 698 # fallback wildcard entry should be last: 699 GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"), 700 ], A6xxGPUInfo( 701 CHIP.A6XX, 702 [a6xx_base, a6xx_gen4], 703 num_ccu = 2, 704 tile_align_w = 32, 705 tile_align_h = 16, 706 num_vsc_pipes = 32, 707 cs_shared_mem_size = 32 * 1024, 708 wave_granularity = 2, 709 fibers_per_sp = 128 * 2 * 16, 710 highest_bank_bit = 14, 711 magic_regs = dict( 712 PC_POWER_CNTL = 1, 713 TPL1_DBG_ECO_CNTL = 0x05008000, 714 GRAS_DBG_ECO_CNTL = 0x0, 715 SP_CHICKEN_BITS = 0x00001400, 716 UCHE_CLIENT_PF = 0x00000084, 717 PC_MODE_CNTL = 0x1f, 718 SP_DBG_ECO_CNTL = 0x00000006, 719 RB_DBG_ECO_CNTL = 0x04100000, 720 RB_DBG_ECO_CNTL_blit = 0x04100000, 721 HLSQ_DBG_ECO_CNTL = 0x0, 722 RB_UNKNOWN_8E01 = 0x0, 723 VPC_DBG_ECO_CNTL = 0x02000000, 724 UCHE_UNKNOWN_0E12 = 0x00000001 725 ) 726 )) 727 728add_gpus([ 729 GPUId(660), 730 ], A6xxGPUInfo( 731 CHIP.A6XX, 732 [a6xx_base, a6xx_gen4], 733 num_ccu = 3, 734 tile_align_w = 96, 735 tile_align_h = 16, 736 num_vsc_pipes = 32, 737 cs_shared_mem_size = 32 * 1024, 738 wave_granularity = 2, 739 fibers_per_sp = 128 * 2 * 16, 740 highest_bank_bit = 16, 741 magic_regs = dict( 742 PC_POWER_CNTL = 2, 743 TPL1_DBG_ECO_CNTL = 0x05008000, 744 GRAS_DBG_ECO_CNTL = 0x0, 745 SP_CHICKEN_BITS = 0x00001400, 746 UCHE_CLIENT_PF = 0x00000084, 747 PC_MODE_CNTL = 0x1f, 748 SP_DBG_ECO_CNTL = 0x01000000, 749 RB_DBG_ECO_CNTL = 0x04100000, 750 RB_DBG_ECO_CNTL_blit = 0x04100000, 751 HLSQ_DBG_ECO_CNTL = 0x0, 752 RB_UNKNOWN_8E01 = 0x0, 753 VPC_DBG_ECO_CNTL = 0x02000000, 754 UCHE_UNKNOWN_0E12 = 0x00000001 755 ) 756 )) 757 758add_gpus([ 759 GPUId(chip_id=0x6060201, name="FD644"), # Called A662 in kgsl 760 GPUId(chip_id=0xffff06060300, name="FD663"), 761 ], A6xxGPUInfo( 762 CHIP.A6XX, 763 [a6xx_base, a6xx_gen4], 764 num_ccu = 3, 765 tile_align_w = 96, 766 tile_align_h = 16, 767 num_vsc_pipes = 32, 768 cs_shared_mem_size = 32 * 1024, 769 wave_granularity = 2, 770 fibers_per_sp = 128 * 4 * 16, 771 magic_regs = dict( 772 PC_POWER_CNTL = 2, 773 TPL1_DBG_ECO_CNTL = 0x05008000, 774 GRAS_DBG_ECO_CNTL = 0x0, 775 SP_CHICKEN_BITS = 0x00001400, 776 UCHE_CLIENT_PF = 0x00000084, 777 PC_MODE_CNTL = 0x1f, 778 SP_DBG_ECO_CNTL = 0x6, 779 RB_DBG_ECO_CNTL = 0x04100000, 780 RB_DBG_ECO_CNTL_blit = 0x04100000, 781 HLSQ_DBG_ECO_CNTL = 0x0, 782 RB_UNKNOWN_8E01 = 0x0, 783 VPC_DBG_ECO_CNTL = 0x02000000, 784 UCHE_UNKNOWN_0E12 = 0x00000001 785 ) 786 )) 787 788add_gpus([ 789 GPUId(690), 790 GPUId(chip_id=0xffff06090000, name="FD690"), # Default no-speedbin fallback 791 ], A6xxGPUInfo( 792 CHIP.A6XX, 793 [a6xx_base, a6xx_gen4, A6XXProps(broken_ds_ubwc_quirk = True)], 794 num_ccu = 8, 795 tile_align_w = 64, 796 tile_align_h = 32, 797 num_vsc_pipes = 32, 798 cs_shared_mem_size = 32 * 1024, 799 wave_granularity = 2, 800 fibers_per_sp = 128 * 2 * 16, 801 highest_bank_bit = 16, 802 magic_regs = dict( 803 PC_POWER_CNTL = 7, 804 TPL1_DBG_ECO_CNTL = 0x04c00000, 805 GRAS_DBG_ECO_CNTL = 0x0, 806 SP_CHICKEN_BITS = 0x00001400, 807 UCHE_CLIENT_PF = 0x00000084, 808 PC_MODE_CNTL = 0x1f, 809 SP_DBG_ECO_CNTL = 0x1200000, 810 RB_DBG_ECO_CNTL = 0x100000, 811 RB_DBG_ECO_CNTL_blit = 0x00100000, # ??? 812 HLSQ_DBG_ECO_CNTL = 0x0, 813 RB_UNKNOWN_8E01 = 0x0, 814 VPC_DBG_ECO_CNTL = 0x2000400, 815 UCHE_UNKNOWN_0E12 = 0x00000001 816 ), 817 raw_magic_regs = [ 818 [A6XXRegs.REG_A6XX_SP_UNKNOWN_AAF2, 0x00c00000], 819 ], 820 )) 821 822# Based on a6xx_base + a6xx_gen4 823a7xx_base = A6XXProps( 824 has_gmem_fast_clear = True, 825 has_hw_multiview = True, 826 has_fs_tex_prefetch = True, 827 has_sampler_minmax = True, 828 829 supports_double_threadsize = True, 830 831 sysmem_per_ccu_depth_cache_size = 256 * 1024, 832 sysmem_per_ccu_color_cache_size = 64 * 1024, 833 gmem_ccu_color_cache_fraction = CCUColorCacheFraction.EIGHTH.value, 834 835 prim_alloc_threshold = 0x7, 836 vs_max_inputs_count = 32, 837 max_sets = 8, 838 839 reg_size_vec4 = 96, 840 # Blob limits it to 128 but we hang with 128 841 instr_cache_size = 127, 842 supports_multiview_mask = True, 843 has_z24uint_s8uint = True, 844 tess_use_shared = True, 845 storage_16bit = True, 846 has_tex_filter_cubic = True, 847 has_separate_chroma_filter = True, 848 has_sample_locations = True, 849 has_lpac = True, 850 has_getfiberid = True, 851 has_dp2acc = True, 852 has_dp4acc = True, 853 enable_lrz_fast_clear = True, 854 has_lrz_dir_tracking = True, 855 has_lrz_feedback = True, 856 has_per_view_viewport = True, 857 line_width_min = 1.0, 858 line_width_max = 127.5, 859 has_scalar_alu = True, 860 has_coherent_ubwc_flag_caches = True, 861 has_isam_v = True, 862 has_ssbo_imm_offsets = True, 863 has_early_preamble = True, 864 has_attachment_shading_rate = True, 865 has_ubwc_linear_mipmap_fallback = True, 866 prede_nop_quirk = True, 867 predtf_nop_quirk = True, 868 has_sad = True, 869 ) 870 871a7xx_gen1 = A7XXProps( 872 supports_ibo_ubwc = True, 873 fs_must_have_non_zero_constlen_quirk = True, 874 enable_tp_ubwc_flag_hint = True, 875 reading_shading_rate_requires_smask_quirk = True, 876 ) 877 878a7xx_gen2 = A7XXProps( 879 stsc_duplication_quirk = True, 880 has_event_write_sample_count = True, 881 ubwc_unorm_snorm_int_compatible = True, 882 supports_ibo_ubwc = True, 883 fs_must_have_non_zero_constlen_quirk = True, 884 # Most devices with a740 have blob v6xx which doesn't have 885 # this hint set. Match them for better compatibility by default. 886 enable_tp_ubwc_flag_hint = False, 887 has_64b_ssbo_atomics = True, 888 has_primitive_shading_rate = True, 889 reading_shading_rate_requires_smask_quirk = True, 890 ) 891 892a7xx_gen3 = A7XXProps( 893 has_event_write_sample_count = True, 894 load_inline_uniforms_via_preamble_ldgk = True, 895 load_shader_consts_via_preamble = True, 896 has_gmem_vpc_attr_buf = True, 897 sysmem_vpc_attr_buf_size = 0x20000, 898 gmem_vpc_attr_buf_size = 0xc000, 899 ubwc_unorm_snorm_int_compatible = True, 900 supports_ibo_ubwc = True, 901 has_generic_clear = True, 902 r8g8_faulty_fast_clear_quirk = True, 903 gs_vpc_adjacency_quirk = True, 904 storage_8bit = True, 905 ubwc_all_formats_compatible = True, 906 has_compliant_dp4acc = True, 907 ubwc_coherency_quirk = True, 908 has_persistent_counter = True, 909 has_64b_ssbo_atomics = True, 910 has_primitive_shading_rate = True, 911 ) 912 913a730_magic_regs = dict( 914 TPL1_DBG_ECO_CNTL = 0x1000000, 915 GRAS_DBG_ECO_CNTL = 0x800, 916 SP_CHICKEN_BITS = 0x1440, 917 UCHE_CLIENT_PF = 0x00000084, 918 PC_MODE_CNTL = 0x0000003f, # 0x00001f1f in some tests 919 SP_DBG_ECO_CNTL = 0x10000000, 920 RB_DBG_ECO_CNTL = 0x00000000, 921 RB_DBG_ECO_CNTL_blit = 0x00000000, # is it even needed? 922 RB_UNKNOWN_8E01 = 0x0, 923 VPC_DBG_ECO_CNTL = 0x02000000, 924 UCHE_UNKNOWN_0E12 = 0x3200000, 925 926 RB_UNKNOWN_8E06 = 0x02080000, 927 ) 928 929a730_raw_magic_regs = [ 930 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00840004], 931 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724], 932 933 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00002400], 934 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00000000], 935 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 936 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 937 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000040], 938 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00008000], 939 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x20080000], 940 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21fc7f00], 941 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00000000], 942 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 943 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 944 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 945 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 946 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 947 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 948 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 949 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 950 951 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 952 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 953 954 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 955 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 956 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 957 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 958 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 959 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 960 961 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 962 963 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 964 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 965 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 966 ] 967 968a740_magic_regs = dict( 969 # PC_POWER_CNTL = 7, 970 TPL1_DBG_ECO_CNTL = 0x11100000, 971 GRAS_DBG_ECO_CNTL = 0x00004800, 972 SP_CHICKEN_BITS = 0x10001400, 973 UCHE_CLIENT_PF = 0x00000084, 974 # Blob uses 0x1f or 0x1f1f, however these values cause vertices 975 # corruption in some tests. 976 PC_MODE_CNTL = 0x0000003f, 977 SP_DBG_ECO_CNTL = 0x10000000, 978 RB_DBG_ECO_CNTL = 0x00000000, 979 RB_DBG_ECO_CNTL_blit = 0x00000000, # is it even needed? 980 # HLSQ_DBG_ECO_CNTL = 0x0, 981 RB_UNKNOWN_8E01 = 0x0, 982 VPC_DBG_ECO_CNTL = 0x02000000, 983 UCHE_UNKNOWN_0E12 = 0x00000000, 984 985 RB_UNKNOWN_8E06 = 0x02080000, 986 ) 987 988a740_raw_magic_regs = [ 989 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004], 990 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724], 991 992 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400], 993 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800], 994 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 995 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 996 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000], 997 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 998 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 999 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600], 1000 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1001 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1002 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1003 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1004 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1005 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1006 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1007 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1008 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1009 1010 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1011 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1012 1013 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1014 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1015 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1016 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1017 1018 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1019 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1020 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1021 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1022 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1023 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1024 1025 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1026 1027 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 1028 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1029 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1030 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], 1031 1032 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], 1033 ] 1034 1035add_gpus([ 1036 # These are named as Adreno730v3 or Adreno725v1. 1037 GPUId(chip_id=0x07030002, name="FD725"), 1038 GPUId(chip_id=0xffff07030002, name="FD725"), 1039 ], A6xxGPUInfo( 1040 CHIP.A7XX, 1041 [a7xx_base, a7xx_gen1, A7XXProps(cmdbuf_start_a725_quirk = True)], 1042 num_ccu = 4, 1043 tile_align_w = 64, 1044 tile_align_h = 32, 1045 num_vsc_pipes = 32, 1046 cs_shared_mem_size = 32 * 1024, 1047 wave_granularity = 2, 1048 fibers_per_sp = 128 * 2 * 16, 1049 highest_bank_bit = 16, 1050 magic_regs = a730_magic_regs, 1051 raw_magic_regs = a730_raw_magic_regs, 1052 )) 1053 1054add_gpus([ 1055 GPUId(chip_id=0x07030001, name="FD730"), # KGSL, no speedbin data 1056 GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback 1057 ], A6xxGPUInfo( 1058 CHIP.A7XX, 1059 [a7xx_base, a7xx_gen1], 1060 num_ccu = 4, 1061 tile_align_w = 64, 1062 tile_align_h = 32, 1063 num_vsc_pipes = 32, 1064 cs_shared_mem_size = 32 * 1024, 1065 wave_granularity = 2, 1066 fibers_per_sp = 128 * 2 * 16, 1067 highest_bank_bit = 16, 1068 magic_regs = a730_magic_regs, 1069 raw_magic_regs = a730_raw_magic_regs, 1070 )) 1071 1072add_gpus([ 1073 GPUId(chip_id=0x43030B00, name="FD735") 1074 ], A6xxGPUInfo( 1075 CHIP.A7XX, 1076 [a7xx_base, a7xx_gen2, A7XXProps(enable_tp_ubwc_flag_hint = True)], 1077 num_ccu = 3, 1078 tile_align_w = 96, 1079 tile_align_h = 32, 1080 num_vsc_pipes = 32, 1081 cs_shared_mem_size = 32 * 1024, 1082 wave_granularity = 2, 1083 fibers_per_sp = 128 * 2 * 16, 1084 magic_regs = dict( 1085 TPL1_DBG_ECO_CNTL = 0x11100000, 1086 GRAS_DBG_ECO_CNTL = 0x00004800, 1087 SP_CHICKEN_BITS = 0x10001400, 1088 UCHE_CLIENT_PF = 0x00000084, 1089 PC_MODE_CNTL = 0x0000001f, 1090 SP_DBG_ECO_CNTL = 0x10000000, 1091 RB_DBG_ECO_CNTL = 0x00000001, 1092 RB_DBG_ECO_CNTL_blit = 0x00000001, # is it even needed? 1093 RB_UNKNOWN_8E01 = 0x0, 1094 VPC_DBG_ECO_CNTL = 0x02000000, 1095 UCHE_UNKNOWN_0E12 = 0x00000000, 1096 1097 RB_UNKNOWN_8E06 = 0x02080000, 1098 ), 1099 raw_magic_regs = [ 1100 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000], 1101 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724], 1102 1103 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400], 1104 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800], 1105 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 1106 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1107 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000], 1108 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1109 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1110 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600], 1111 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1112 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1113 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1114 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1115 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1116 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1117 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1118 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1119 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1120 1121 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1122 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1123 1124 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1125 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1126 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1127 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1128 1129 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1130 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1131 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1132 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1133 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1134 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1135 1136 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1137 1138 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 1139 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1140 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1141 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], 1142 1143 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], 1144 ], 1145 )) 1146 1147add_gpus([ 1148 GPUId(740), # Deprecated, used for dev kernels. 1149 GPUId(chip_id=0x43050a01, name="FD740"), # KGSL, no speedbin data 1150 GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback 1151 ], A6xxGPUInfo( 1152 CHIP.A7XX, 1153 [a7xx_base, a7xx_gen2], 1154 num_ccu = 6, 1155 tile_align_w = 96, 1156 tile_align_h = 32, 1157 num_vsc_pipes = 32, 1158 cs_shared_mem_size = 32 * 1024, 1159 wave_granularity = 2, 1160 fibers_per_sp = 128 * 2 * 16, 1161 highest_bank_bit = 16, 1162 magic_regs = a740_magic_regs, 1163 raw_magic_regs = a740_raw_magic_regs, 1164 )) 1165 1166add_gpus([ 1167 GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"), 1168 ], A6xxGPUInfo( 1169 CHIP.A7XX, 1170 [a7xx_base, a7xx_gen2, A7XXProps(compute_constlen_quirk = True)], 1171 num_ccu = 6, 1172 tile_align_w = 96, 1173 tile_align_h = 32, 1174 num_vsc_pipes = 32, 1175 cs_shared_mem_size = 32 * 1024, 1176 wave_granularity = 2, 1177 fibers_per_sp = 128 * 2 * 16, 1178 highest_bank_bit = 16, 1179 magic_regs = a740_magic_regs, 1180 raw_magic_regs = a740_raw_magic_regs, 1181 )) 1182 1183# Values from blob v676.0 1184add_gpus([ 1185 GPUId(chip_id=0x43050a00, name="FDA32"), # Adreno A32 (G3x Gen 2) 1186 GPUId(chip_id=0xffff43050a00, name="FDA32"), 1187 ], A6xxGPUInfo( 1188 CHIP.A7XX, 1189 [a7xx_base, a7xx_gen2, A7XXProps(cmdbuf_start_a725_quirk = True)], 1190 num_ccu = 6, 1191 tile_align_w = 96, 1192 tile_align_h = 32, 1193 num_vsc_pipes = 32, 1194 cs_shared_mem_size = 32 * 1024, 1195 wave_granularity = 2, 1196 fibers_per_sp = 128 * 2 * 16, 1197 magic_regs = a740_magic_regs, 1198 raw_magic_regs = [ 1199 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004], 1200 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00000700], 1201 1202 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400], 1203 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430820], 1204 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 1205 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1206 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080], 1207 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1208 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1209 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600], 1210 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1211 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1212 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1213 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1214 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1215 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1216 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1217 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1218 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1219 1220 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1221 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1222 1223 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1224 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1225 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1226 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1227 1228 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1229 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1230 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1231 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1232 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1233 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1234 1235 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1236 1237 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 1238 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1239 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1240 ], 1241 )) 1242 1243add_gpus([ 1244 GPUId(chip_id=0x43050b00, name="FD740v3"), # Quest 3 1245 GPUId(chip_id=0xffff43050b00, name="FD740v3"), 1246 ], A6xxGPUInfo( 1247 CHIP.A7XX, 1248 [a7xx_base, a7xx_gen2, A7XXProps(enable_tp_ubwc_flag_hint = True)], 1249 num_ccu = 6, 1250 tile_align_w = 96, 1251 tile_align_h = 32, 1252 num_vsc_pipes = 32, 1253 cs_shared_mem_size = 32 * 1024, 1254 wave_granularity = 2, 1255 fibers_per_sp = 128 * 2 * 16, 1256 magic_regs = dict( 1257 # PC_POWER_CNTL = 7, 1258 TPL1_DBG_ECO_CNTL = 0x11100000, 1259 GRAS_DBG_ECO_CNTL = 0x00004800, 1260 SP_CHICKEN_BITS = 0x10001400, 1261 UCHE_CLIENT_PF = 0x00000084, 1262 # Blob uses 0x1f or 0x1f1f, however these values cause vertices 1263 # corruption in some tests. 1264 PC_MODE_CNTL = 0x0000003f, 1265 SP_DBG_ECO_CNTL = 0x10000000, 1266 RB_DBG_ECO_CNTL = 0x00000001, 1267 RB_DBG_ECO_CNTL_blit = 0x00000000, # is it even needed? 1268 # HLSQ_DBG_ECO_CNTL = 0x0, 1269 RB_UNKNOWN_8E01 = 0x0, 1270 VPC_DBG_ECO_CNTL = 0x02000000, 1271 UCHE_UNKNOWN_0E12 = 0x00000000, 1272 1273 RB_UNKNOWN_8E06 = 0x02080000, 1274 ), 1275 raw_magic_regs = a740_raw_magic_regs, 1276 )) 1277 1278add_gpus([ 1279 GPUId(chip_id=0x43051401, name="FD750"), # KGSL, no speedbin data 1280 GPUId(chip_id=0xffff43051401, name="FD750"), # Default no-speedbin fallback 1281 ], A6xxGPUInfo( 1282 CHIP.A7XX, 1283 [a7xx_base, a7xx_gen3], 1284 num_ccu = 6, 1285 tile_align_w = 96, 1286 tile_align_h = 32, 1287 num_vsc_pipes = 32, 1288 cs_shared_mem_size = 32 * 1024, 1289 wave_granularity = 2, 1290 fibers_per_sp = 128 * 2 * 16, 1291 highest_bank_bit = 16, 1292 magic_regs = dict( 1293 TPL1_DBG_ECO_CNTL = 0x11100000, 1294 GRAS_DBG_ECO_CNTL = 0x00004800, 1295 SP_CHICKEN_BITS = 0x10000400, 1296 PC_MODE_CNTL = 0x00003f1f, 1297 SP_DBG_ECO_CNTL = 0x10000000, 1298 RB_DBG_ECO_CNTL = 0x00000001, 1299 RB_DBG_ECO_CNTL_blit = 0x00000001, 1300 RB_UNKNOWN_8E01 = 0x0, 1301 VPC_DBG_ECO_CNTL = 0x02000000, 1302 UCHE_UNKNOWN_0E12 = 0x40000000, 1303 1304 RB_UNKNOWN_8E06 = 0x02082000, 1305 ), 1306 raw_magic_regs = [ 1307 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000], 1308 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1309 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080], 1310 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000000], 1311 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00431800], 1312 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00800000], 1313 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1314 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1315 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600], 1316 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1317 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1318 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1319 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1320 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1321 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1322 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1323 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1324 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1325 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1326 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1327 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1328 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1329 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1330 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1331 1332 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1333 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1334 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1335 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1336 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1337 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1338 1339 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1340 1341 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1342 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1343 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], 1344 1345 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], 1346 1347 [0x930a, 0], 1348 [0x960a, 1], 1349 [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS_CONTROL, 0], 1350 [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS, 0], 1351 ], 1352 )) 1353 1354template = """\ 1355/* Copyright © 2021 Google, Inc. 1356 * 1357 * SPDX-License-Identifier: MIT 1358 */ 1359 1360#include "freedreno_dev_info.h" 1361#include "util/u_debug.h" 1362#include "util/log.h" 1363 1364#include <stdlib.h> 1365 1366/* Map python to C: */ 1367#define True true 1368#define False false 1369 1370%for info in s.gpu_infos: 1371static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)}; 1372%endfor 1373 1374static const struct fd_dev_rec fd_dev_recs[] = { 1375%for id, info in s.gpus.items(): 1376 { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} }, 1377%endfor 1378}; 1379 1380void 1381fd_dev_info_apply_dbg_options(struct fd_dev_info *info) 1382{ 1383 const char *env = debug_get_option("FD_DEV_FEATURES", NULL); 1384 if (!env || !*env) 1385 return; 1386 1387 char *features = strdup(env); 1388 char *feature, *feature_end; 1389 feature = strtok_r(features, ":", &feature_end); 1390 while (feature != NULL) { 1391 char *name, *name_end; 1392 name = strtok_r(feature, "=", &name_end); 1393 1394 if (!name) { 1395 mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", feature); 1396 exit(1); 1397 } 1398 1399 char *value = strtok_r(NULL, "=", &name_end); 1400 1401 feature = strtok_r(NULL, ":", &feature_end); 1402 1403%for (prop, gen), val in unique_props.items(): 1404 <% 1405 if isinstance(val, bool): 1406 parse_value = "debug_parse_bool_option" 1407 else: 1408 parse_value = "debug_parse_num_option" 1409 %> 1410 if (strcmp(name, "${prop}") == 0) { 1411 info->${gen}.${prop} = ${parse_value}(value, info->${gen}.${prop}); 1412 continue; 1413 } 1414%endfor 1415 1416 mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", name); 1417 exit(1); 1418 } 1419 1420 free(features); 1421} 1422""" 1423 1424print(Template(template).render(s=s, unique_props=A6XXProps.unique_props)) 1425 1426