1# 2# Copyright © 2021 Google, Inc. 3# 4# SPDX-License-Identifier: MIT 5 6from mako.template import Template 7import sys 8import argparse 9from enum import Enum 10 11def max_bitfield_val(high, low, shift): 12 return ((1 << (high - low)) - 1) << shift 13 14 15parser = argparse.ArgumentParser() 16parser.add_argument('-p', '--import-path', required=True) 17args = parser.parse_args() 18sys.path.insert(0, args.import_path) 19 20from a6xx import * 21 22 23class CHIP(Enum): 24 A2XX = 2 25 A3XX = 3 26 A4XX = 4 27 A5XX = 5 28 A6XX = 6 29 A7XX = 7 30 31class CCUColorCacheFraction(Enum): 32 FULL = 0 33 HALF = 1 34 QUARTER = 2 35 EIGHTH = 3 36 37 38class State(object): 39 def __init__(self): 40 # List of unique device-info structs, multiple different GPU ids 41 # can map to a single info struct in cases where the differences 42 # are not sw visible, or the only differences are parameters 43 # queried from the kernel (like GMEM size) 44 self.gpu_infos = [] 45 46 # Table mapping GPU id to device-info struct 47 self.gpus = {} 48 49 def info_index(self, gpu_info): 50 i = 0 51 for info in self.gpu_infos: 52 if gpu_info == info: 53 return i 54 i += 1 55 raise Error("invalid info") 56 57s = State() 58 59def add_gpus(ids, info): 60 for id in ids: 61 s.gpus[id] = info 62 63class GPUId(object): 64 def __init__(self, gpu_id = None, chip_id = None, name=None): 65 if chip_id is None: 66 assert(gpu_id is not None) 67 val = gpu_id 68 core = int(val / 100) 69 val -= (core * 100) 70 major = int(val / 10) 71 val -= (major * 10) 72 minor = val 73 chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff 74 self.chip_id = chip_id 75 if gpu_id is None: 76 gpu_id = 0 77 self.gpu_id = gpu_id 78 if name is None: 79 assert(gpu_id != 0) 80 name = "FD%d" % gpu_id 81 self.name = name 82 83class Struct(object): 84 """A helper class that stringifies itself to a 'C' struct initializer 85 """ 86 def __str__(self): 87 s = "{" 88 for name, value in vars(self).items(): 89 s += "." + name + "=" + str(value) + "," 90 return s + "}" 91 92class GPUInfo(Struct): 93 """Base class for any generation of adreno, consists of GMEM layout 94 related parameters 95 96 Note that tile_max_h is normally only constrained by corresponding 97 bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h 98 tends to have lower limits, in which case a comment will describe 99 the bitfield size/shift 100 """ 101 def __init__(self, chip, gmem_align_w, gmem_align_h, 102 tile_align_w, tile_align_h, 103 tile_max_w, tile_max_h, num_vsc_pipes, 104 cs_shared_mem_size, num_sp_cores, wave_granularity, fibers_per_sp, 105 highest_bank_bit = 0, ubwc_swizzle = 0x7, macrotile_mode = 0, 106 threadsize_base = 64, max_waves = 16): 107 self.chip = chip.value 108 self.gmem_align_w = gmem_align_w 109 self.gmem_align_h = gmem_align_h 110 self.tile_align_w = tile_align_w 111 self.tile_align_h = tile_align_h 112 self.tile_max_w = tile_max_w 113 self.tile_max_h = tile_max_h 114 self.num_vsc_pipes = num_vsc_pipes 115 self.cs_shared_mem_size = cs_shared_mem_size 116 self.num_sp_cores = num_sp_cores 117 self.wave_granularity = wave_granularity 118 self.fibers_per_sp = fibers_per_sp 119 self.threadsize_base = threadsize_base 120 self.max_waves = max_waves 121 self.highest_bank_bit = highest_bank_bit 122 self.ubwc_swizzle = ubwc_swizzle 123 self.macrotile_mode = macrotile_mode 124 125 s.gpu_infos.append(self) 126 127 128class A6xxGPUInfo(GPUInfo): 129 """The a6xx generation has a lot more parameters, and is broken down 130 into distinct sub-generations. The template parameter avoids 131 duplication of parameters that are unique to the sub-generation. 132 """ 133 def __init__(self, chip, template, num_ccu, 134 tile_align_w, tile_align_h, num_vsc_pipes, 135 cs_shared_mem_size, wave_granularity, fibers_per_sp, 136 magic_regs, raw_magic_regs = None, highest_bank_bit = 15, 137 ubwc_swizzle = 0x6, macrotile_mode = 1, 138 threadsize_base = 64, max_waves = 16): 139 if chip == CHIP.A6XX: 140 tile_max_w = 1024 # max_bitfield_val(5, 0, 5) 141 tile_max_h = max_bitfield_val(14, 8, 4) # 1008 142 else: 143 tile_max_w = 1728 144 tile_max_h = 1728 145 146 super().__init__(chip, gmem_align_w = 16, gmem_align_h = 4, 147 tile_align_w = tile_align_w, 148 tile_align_h = tile_align_h, 149 tile_max_w = tile_max_w, 150 tile_max_h = tile_max_h, 151 num_vsc_pipes = num_vsc_pipes, 152 cs_shared_mem_size = cs_shared_mem_size, 153 num_sp_cores = num_ccu, # The # of SP cores seems to always match # of CCU 154 wave_granularity = wave_granularity, 155 fibers_per_sp = fibers_per_sp, 156 highest_bank_bit = highest_bank_bit, 157 ubwc_swizzle = ubwc_swizzle, 158 macrotile_mode = macrotile_mode, 159 threadsize_base = threadsize_base, 160 max_waves = max_waves) 161 162 self.num_ccu = num_ccu 163 164 self.a6xx = Struct() 165 self.a7xx = Struct() 166 167 self.a6xx.magic = Struct() 168 169 for name, val in magic_regs.items(): 170 setattr(self.a6xx.magic, name, val) 171 172 if raw_magic_regs: 173 self.a6xx.magic_raw = [[int(r[0]), r[1]] for r in raw_magic_regs] 174 175 templates = template if isinstance(template, list) else [template] 176 for template in templates: 177 template.apply_props(self) 178 179 180 def __str__(self): 181 return super(A6xxGPUInfo, self).__str__().replace('[', '{').replace("]", "}") 182 183 184# a2xx is really two sub-generations, a20x and a22x, but we don't currently 185# capture that in the device-info tables 186add_gpus([ 187 GPUId(200), 188 GPUId(201), 189 GPUId(205), 190 GPUId(220), 191 ], GPUInfo( 192 CHIP.A2XX, 193 gmem_align_w = 32, gmem_align_h = 32, 194 tile_align_w = 32, tile_align_h = 32, 195 tile_max_w = 512, 196 tile_max_h = ~0, # TODO 197 num_vsc_pipes = 8, 198 cs_shared_mem_size = 0, 199 num_sp_cores = 0, # TODO 200 wave_granularity = 2, 201 fibers_per_sp = 0, # TODO 202 threadsize_base = 8, # TODO: Confirm this 203 )) 204 205add_gpus([ 206 GPUId(305), 207 GPUId(307), 208 GPUId(320), 209 GPUId(330), 210 GPUId(chip_id=0x03000512, name="FD305B"), 211 GPUId(chip_id=0x03000620, name="FD306A"), 212 ], GPUInfo( 213 CHIP.A3XX, 214 gmem_align_w = 32, gmem_align_h = 32, 215 tile_align_w = 32, tile_align_h = 32, 216 tile_max_w = 992, # max_bitfield_val(4, 0, 5) 217 tile_max_h = max_bitfield_val(9, 5, 5), 218 num_vsc_pipes = 8, 219 cs_shared_mem_size = 32 * 1024, 220 num_sp_cores = 0, # TODO 221 wave_granularity = 2, 222 fibers_per_sp = 0, # TODO 223 threadsize_base = 8, 224 )) 225 226add_gpus([ 227 GPUId(405), 228 GPUId(420), 229 GPUId(430), 230 ], GPUInfo( 231 CHIP.A4XX, 232 gmem_align_w = 32, gmem_align_h = 32, 233 tile_align_w = 32, tile_align_h = 32, 234 tile_max_w = 1024, # max_bitfield_val(4, 0, 5) 235 tile_max_h = max_bitfield_val(9, 5, 5), 236 num_vsc_pipes = 8, 237 cs_shared_mem_size = 32 * 1024, 238 num_sp_cores = 0, # TODO 239 wave_granularity = 2, 240 fibers_per_sp = 0, # TODO 241 threadsize_base = 32, # TODO: Confirm this 242 )) 243 244add_gpus([ 245 GPUId(505), 246 GPUId(506), 247 GPUId(508), 248 GPUId(509), 249 ], GPUInfo( 250 CHIP.A5XX, 251 gmem_align_w = 64, gmem_align_h = 32, 252 tile_align_w = 64, tile_align_h = 32, 253 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 254 tile_max_h = max_bitfield_val(16, 9, 5), 255 num_vsc_pipes = 16, 256 cs_shared_mem_size = 32 * 1024, 257 num_sp_cores = 1, 258 wave_granularity = 2, 259 fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. 260 highest_bank_bit = 14, 261 threadsize_base = 32, 262 )) 263 264add_gpus([ 265 GPUId(510), 266 GPUId(512), 267 ], GPUInfo( 268 CHIP.A5XX, 269 gmem_align_w = 64, gmem_align_h = 32, 270 tile_align_w = 64, tile_align_h = 32, 271 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 272 tile_max_h = max_bitfield_val(16, 9, 5), 273 num_vsc_pipes = 16, 274 cs_shared_mem_size = 32 * 1024, 275 num_sp_cores = 2, 276 wave_granularity = 2, 277 fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. 278 highest_bank_bit = 14, 279 threadsize_base = 32, 280 )) 281 282add_gpus([ 283 GPUId(530), 284 GPUId(540), 285 ], GPUInfo( 286 CHIP.A5XX, 287 gmem_align_w = 64, gmem_align_h = 32, 288 tile_align_w = 64, tile_align_h = 32, 289 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 290 tile_max_h = max_bitfield_val(16, 9, 5), 291 num_vsc_pipes = 16, 292 cs_shared_mem_size = 32 * 1024, 293 num_sp_cores = 4, 294 wave_granularity = 2, 295 fibers_per_sp = 64 * 16, # Lowest number that didn't fault on spillall fs-varying-array-mat4-col-row-rd. 296 highest_bank_bit = 15, 297 threadsize_base = 32, 298 )) 299 300 301class A6XXProps(dict): 302 unique_props = dict() 303 def apply_gen_props(self, gen, gpu_info): 304 for name, val in self.items(): 305 setattr(getattr(gpu_info, gen), name, val) 306 A6XXProps.unique_props[(name, gen)] = val 307 308 def apply_props(self, gpu_info): 309 self.apply_gen_props("a6xx", gpu_info) 310 311 312class A7XXProps(A6XXProps): 313 def apply_props(self, gpu_info): 314 self.apply_gen_props("a7xx", gpu_info) 315 316 317# Props could be modified with env var: 318# FD_DEV_FEATURES=%feature_name%=%value%:%feature_name%=%value%:... 319# e.g. 320# FD_DEV_FEATURES=has_fs_tex_prefetch=0:max_sets=4 321 322a6xx_base = A6XXProps( 323 has_cp_reg_write = True, 324 has_8bpp_ubwc = True, 325 has_gmem_fast_clear = True, 326 has_hw_multiview = True, 327 has_fs_tex_prefetch = True, 328 has_sampler_minmax = True, 329 330 supports_double_threadsize = True, 331 332 sysmem_per_ccu_depth_cache_size = 64 * 1024, 333 sysmem_per_ccu_color_cache_size = 64 * 1024, 334 gmem_ccu_color_cache_fraction = CCUColorCacheFraction.QUARTER.value, 335 336 prim_alloc_threshold = 0x7, 337 vs_max_inputs_count = 32, 338 max_sets = 5, 339 line_width_min = 1.0, 340 line_width_max = 1.0, 341 ) 342 343 344# a6xx and a7xx can be divided into distinct sub-generations, where certain 345# device-info parameters are keyed to the sub-generation. These templates 346# reduce the copypaste 347 348a6xx_gen1_low = A6XXProps( 349 reg_size_vec4 = 48, 350 instr_cache_size = 64, 351 indirect_draw_wfm_quirk = True, 352 depth_bounds_require_depth_test_quirk = True, 353 354 has_gmem_fast_clear = False, 355 has_hw_multiview = False, 356 has_sampler_minmax = False, 357 has_fs_tex_prefetch = False, 358 sysmem_per_ccu_color_cache_size = 8 * 1024, 359 sysmem_per_ccu_depth_cache_size = 8 * 1024, 360 gmem_ccu_color_cache_fraction = CCUColorCacheFraction.HALF.value, 361 vs_max_inputs_count = 16, 362 supports_double_threadsize = False, 363 ) 364 365a6xx_gen1 = A6XXProps( 366 reg_size_vec4 = 96, 367 instr_cache_size = 64, 368 indirect_draw_wfm_quirk = True, 369 depth_bounds_require_depth_test_quirk = True, 370 ) 371 372a6xx_gen2 = A6XXProps( 373 reg_size_vec4 = 96, 374 instr_cache_size = 64, # TODO 375 supports_multiview_mask = True, 376 has_z24uint_s8uint = True, 377 indirect_draw_wfm_quirk = True, 378 depth_bounds_require_depth_test_quirk = True, # TODO: check if true 379 has_dp2acc = False, # TODO: check if true 380 has_8bpp_ubwc = False, 381 ) 382 383a6xx_gen3 = A6XXProps( 384 reg_size_vec4 = 64, 385 # Blob limits it to 128 but we hang with 128 386 instr_cache_size = 127, 387 supports_multiview_mask = True, 388 has_z24uint_s8uint = True, 389 tess_use_shared = True, 390 storage_16bit = True, 391 has_tex_filter_cubic = True, 392 has_separate_chroma_filter = True, 393 has_sample_locations = True, 394 has_8bpp_ubwc = False, 395 has_dp2acc = True, 396 has_lrz_dir_tracking = True, 397 enable_lrz_fast_clear = True, 398 lrz_track_quirk = True, 399 has_lrz_feedback = True, 400 has_per_view_viewport = True, 401 has_scalar_alu = True, 402 has_early_preamble = True, 403 prede_nop_quirk = True, 404 ) 405 406a6xx_gen4 = A6XXProps( 407 reg_size_vec4 = 64, 408 # Blob limits it to 128 but we hang with 128 409 instr_cache_size = 127, 410 supports_multiview_mask = True, 411 has_z24uint_s8uint = True, 412 tess_use_shared = True, 413 storage_16bit = True, 414 has_tex_filter_cubic = True, 415 has_separate_chroma_filter = True, 416 has_sample_locations = True, 417 has_cp_reg_write = False, 418 has_8bpp_ubwc = False, 419 has_lpac = True, 420 has_legacy_pipeline_shading_rate = True, 421 has_getfiberid = True, 422 has_dp2acc = True, 423 has_dp4acc = True, 424 enable_lrz_fast_clear = True, 425 has_lrz_dir_tracking = True, 426 has_lrz_feedback = True, 427 has_per_view_viewport = True, 428 has_scalar_alu = True, 429 has_isam_v = True, 430 has_ssbo_imm_offsets = True, 431 has_ubwc_linear_mipmap_fallback = True, 432 # TODO: there seems to be a quirk where at least rcp can't be in an 433 # early preamble. a660 at least is affected. 434 #has_early_preamble = True, 435 prede_nop_quirk = True, 436 predtf_nop_quirk = True, 437 has_sad = True, 438 ) 439 440add_gpus([ 441 GPUId(605), # TODO: Test it, based only on libwrapfake dumps 442 GPUId(608), # TODO: Test it, based only on libwrapfake dumps 443 GPUId(610), 444 GPUId(612), # TODO: Test it, based only on libwrapfake dumps 445 ], A6xxGPUInfo( 446 CHIP.A6XX, 447 [a6xx_base, a6xx_gen1_low], 448 num_ccu = 1, 449 tile_align_w = 32, 450 tile_align_h = 16, 451 num_vsc_pipes = 16, 452 cs_shared_mem_size = 16 * 1024, 453 wave_granularity = 1, 454 fibers_per_sp = 128 * 16, 455 highest_bank_bit = 13, 456 ubwc_swizzle = 0x7, 457 macrotile_mode = 0, 458 magic_regs = dict( 459 PC_POWER_CNTL = 0, 460 TPL1_DBG_ECO_CNTL = 0, 461 GRAS_DBG_ECO_CNTL = 0, 462 SP_CHICKEN_BITS = 0, 463 UCHE_CLIENT_PF = 0x00000004, 464 PC_MODE_CNTL = 0xf, 465 SP_DBG_ECO_CNTL = 0x0, 466 RB_DBG_ECO_CNTL = 0x04100000, 467 RB_DBG_ECO_CNTL_blit = 0x04100000, 468 HLSQ_DBG_ECO_CNTL = 0, 469 RB_UNKNOWN_8E01 = 0x00000001, 470 VPC_DBG_ECO_CNTL = 0x0, 471 UCHE_UNKNOWN_0E12 = 0x10000000, 472 ), 473 )) 474 475add_gpus([ 476 GPUId(615), 477 GPUId(616), 478 GPUId(618), 479 GPUId(619), 480 ], A6xxGPUInfo( 481 CHIP.A6XX, 482 [a6xx_base, a6xx_gen1], 483 num_ccu = 1, 484 tile_align_w = 32, 485 tile_align_h = 32, 486 num_vsc_pipes = 32, 487 cs_shared_mem_size = 32 * 1024, 488 wave_granularity = 2, 489 fibers_per_sp = 128 * 16, 490 highest_bank_bit = 14, 491 macrotile_mode = 0, 492 magic_regs = dict( 493 PC_POWER_CNTL = 0, 494 TPL1_DBG_ECO_CNTL = 0x00108000, 495 GRAS_DBG_ECO_CNTL = 0x00000880, 496 SP_CHICKEN_BITS = 0x00000430, 497 UCHE_CLIENT_PF = 0x00000004, 498 PC_MODE_CNTL = 0x1f, 499 SP_DBG_ECO_CNTL = 0x0, 500 RB_DBG_ECO_CNTL = 0x04100000, 501 RB_DBG_ECO_CNTL_blit = 0x04100000, 502 HLSQ_DBG_ECO_CNTL = 0x00080000, 503 RB_UNKNOWN_8E01 = 0x00000001, 504 VPC_DBG_ECO_CNTL = 0x0, 505 UCHE_UNKNOWN_0E12 = 0x00000001 506 ) 507 )) 508 509add_gpus([ 510 GPUId(620), 511 ], A6xxGPUInfo( 512 CHIP.A6XX, 513 [a6xx_base, a6xx_gen1], 514 num_ccu = 1, 515 tile_align_w = 32, 516 tile_align_h = 16, 517 num_vsc_pipes = 32, 518 cs_shared_mem_size = 32 * 1024, 519 wave_granularity = 2, 520 fibers_per_sp = 128 * 16, 521 magic_regs = dict( 522 PC_POWER_CNTL = 0, 523 TPL1_DBG_ECO_CNTL = 0x01008000, 524 GRAS_DBG_ECO_CNTL = 0x0, 525 SP_CHICKEN_BITS = 0x00000400, 526 UCHE_CLIENT_PF = 0x00000004, 527 PC_MODE_CNTL = 0x1f, 528 SP_DBG_ECO_CNTL = 0x01000000, 529 RB_DBG_ECO_CNTL = 0x04100000, 530 RB_DBG_ECO_CNTL_blit = 0x04100000, 531 HLSQ_DBG_ECO_CNTL = 0x0, 532 RB_UNKNOWN_8E01 = 0x0, 533 VPC_DBG_ECO_CNTL = 0x02000000, 534 UCHE_UNKNOWN_0E12 = 0x00000001 535 ) 536 )) 537 538add_gpus([ 539 GPUId(chip_id=0xffff06020100, name="FD621"), 540 ], A6xxGPUInfo( 541 CHIP.A6XX, 542 [a6xx_base, a6xx_gen3, A6XXProps(lrz_track_quirk = False)], 543 num_ccu = 2, 544 tile_align_w = 96, 545 tile_align_h = 16, 546 num_vsc_pipes = 32, 547 cs_shared_mem_size = 32 * 1024, 548 wave_granularity = 2, 549 fibers_per_sp = 128 * 2 * 16, 550 magic_regs = dict( 551 PC_POWER_CNTL = 0, 552 # this seems to be a chicken bit that fixes cubic filtering: 553 TPL1_DBG_ECO_CNTL = 0x01008000, 554 GRAS_DBG_ECO_CNTL = 0x0, 555 SP_CHICKEN_BITS = 0x00001400, 556 # UCHE_CLIENT_PF = 0x00000004, 557 PC_MODE_CNTL = 0x1f, 558 SP_DBG_ECO_CNTL = 0x03000000, 559 RB_DBG_ECO_CNTL = 0x04100000, 560 RB_DBG_ECO_CNTL_blit = 0x04100000, 561 HLSQ_DBG_ECO_CNTL = 0x0, 562 RB_UNKNOWN_8E01 = 0x0, 563 VPC_DBG_ECO_CNTL = 0x02000000, 564 UCHE_UNKNOWN_0E12 = 0x00000001 565 ) 566 )) 567 568add_gpus([ 569 GPUId(630), 570 ], A6xxGPUInfo( 571 CHIP.A6XX, 572 [a6xx_base, a6xx_gen1], 573 num_ccu = 2, 574 tile_align_w = 32, 575 tile_align_h = 16, 576 num_vsc_pipes = 32, 577 cs_shared_mem_size = 32 * 1024, 578 wave_granularity = 2, 579 fibers_per_sp = 128 * 16, 580 highest_bank_bit = 15, 581 macrotile_mode = 0, 582 magic_regs = dict( 583 PC_POWER_CNTL = 1, 584 TPL1_DBG_ECO_CNTL = 0x00108000, 585 GRAS_DBG_ECO_CNTL = 0x00000880, 586 SP_CHICKEN_BITS = 0x00001430, 587 UCHE_CLIENT_PF = 0x00000004, 588 PC_MODE_CNTL = 0x1f, 589 SP_DBG_ECO_CNTL = 0x0, 590 RB_DBG_ECO_CNTL = 0x04100000, 591 RB_DBG_ECO_CNTL_blit = 0x05100000, 592 HLSQ_DBG_ECO_CNTL = 0x00080000, 593 RB_UNKNOWN_8E01 = 0x00000001, 594 VPC_DBG_ECO_CNTL = 0x0, 595 UCHE_UNKNOWN_0E12 = 0x10000001 596 ) 597 )) 598 599add_gpus([ 600 GPUId(640), 601 ], A6xxGPUInfo( 602 CHIP.A6XX, 603 [a6xx_base, a6xx_gen2], 604 num_ccu = 2, 605 tile_align_w = 32, 606 tile_align_h = 16, 607 num_vsc_pipes = 32, 608 cs_shared_mem_size = 32 * 1024, 609 wave_granularity = 2, 610 fibers_per_sp = 128 * 4 * 16, 611 highest_bank_bit = 15, 612 macrotile_mode = 0, 613 magic_regs = dict( 614 PC_POWER_CNTL = 1, 615 TPL1_DBG_ECO_CNTL = 0x00008000, 616 GRAS_DBG_ECO_CNTL = 0x0, 617 SP_CHICKEN_BITS = 0x00000420, 618 UCHE_CLIENT_PF = 0x00000004, 619 PC_MODE_CNTL = 0x1f, 620 SP_DBG_ECO_CNTL = 0x0, 621 RB_DBG_ECO_CNTL = 0x04100000, 622 RB_DBG_ECO_CNTL_blit = 0x04100000, 623 HLSQ_DBG_ECO_CNTL = 0x0, 624 RB_UNKNOWN_8E01 = 0x00000001, 625 VPC_DBG_ECO_CNTL = 0x02000000, 626 UCHE_UNKNOWN_0E12 = 0x00000001 627 ) 628 )) 629 630add_gpus([ 631 GPUId(680), 632 ], A6xxGPUInfo( 633 CHIP.A6XX, 634 [a6xx_base, a6xx_gen2], 635 num_ccu = 4, 636 tile_align_w = 64, 637 tile_align_h = 32, 638 num_vsc_pipes = 32, 639 cs_shared_mem_size = 32 * 1024, 640 wave_granularity = 2, 641 fibers_per_sp = 128 * 4 * 16, 642 highest_bank_bit = 15, 643 macrotile_mode = 0, 644 magic_regs = dict( 645 PC_POWER_CNTL = 3, 646 TPL1_DBG_ECO_CNTL = 0x00108000, 647 GRAS_DBG_ECO_CNTL = 0x0, 648 SP_CHICKEN_BITS = 0x00001430, 649 UCHE_CLIENT_PF = 0x00000004, 650 PC_MODE_CNTL = 0x1f, 651 SP_DBG_ECO_CNTL = 0x0, 652 RB_DBG_ECO_CNTL = 0x04100000, 653 RB_DBG_ECO_CNTL_blit = 0x04100000, 654 HLSQ_DBG_ECO_CNTL = 0x0, 655 RB_UNKNOWN_8E01 = 0x00000001, 656 VPC_DBG_ECO_CNTL = 0x02000000, 657 UCHE_UNKNOWN_0E12 = 0x00000001 658 ) 659 )) 660 661add_gpus([ 662 GPUId(650), 663 ], A6xxGPUInfo( 664 CHIP.A6XX, 665 [a6xx_base, a6xx_gen3], 666 num_ccu = 3, 667 tile_align_w = 96, 668 tile_align_h = 16, 669 num_vsc_pipes = 32, 670 cs_shared_mem_size = 32 * 1024, 671 wave_granularity = 2, 672 fibers_per_sp = 128 * 2 * 16, 673 highest_bank_bit = 16, 674 magic_regs = dict( 675 PC_POWER_CNTL = 2, 676 # this seems to be a chicken bit that fixes cubic filtering: 677 TPL1_DBG_ECO_CNTL = 0x01008000, 678 GRAS_DBG_ECO_CNTL = 0x0, 679 SP_CHICKEN_BITS = 0x00001400, 680 UCHE_CLIENT_PF = 0x00000004, 681 PC_MODE_CNTL = 0x1f, 682 SP_DBG_ECO_CNTL = 0x01000000, 683 RB_DBG_ECO_CNTL = 0x04100000, 684 RB_DBG_ECO_CNTL_blit = 0x04100000, 685 HLSQ_DBG_ECO_CNTL = 0x0, 686 RB_UNKNOWN_8E01 = 0x0, 687 VPC_DBG_ECO_CNTL = 0x02000000, 688 UCHE_UNKNOWN_0E12 = 0x00000001 689 ) 690 )) 691 692add_gpus([ 693 # These are all speedbins/variants of A635 694 GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"), 695 GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"), 696 GPUId(chip_id=0x006006030500, name="Adreno 7c+ Gen 3 Lite"), 697 GPUId(chip_id=0x00ac06030500, name="FD643"), # e.g. QCM6490, Fairphone 5 698 # fallback wildcard entry should be last: 699 GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"), 700 ], A6xxGPUInfo( 701 CHIP.A6XX, 702 [a6xx_base, a6xx_gen4], 703 num_ccu = 2, 704 tile_align_w = 32, 705 tile_align_h = 16, 706 num_vsc_pipes = 32, 707 cs_shared_mem_size = 32 * 1024, 708 wave_granularity = 2, 709 fibers_per_sp = 128 * 2 * 16, 710 highest_bank_bit = 14, 711 magic_regs = dict( 712 PC_POWER_CNTL = 1, 713 TPL1_DBG_ECO_CNTL = 0x05008000, 714 GRAS_DBG_ECO_CNTL = 0x0, 715 SP_CHICKEN_BITS = 0x00001400, 716 UCHE_CLIENT_PF = 0x00000084, 717 PC_MODE_CNTL = 0x1f, 718 SP_DBG_ECO_CNTL = 0x00000006, 719 RB_DBG_ECO_CNTL = 0x04100000, 720 RB_DBG_ECO_CNTL_blit = 0x04100000, 721 HLSQ_DBG_ECO_CNTL = 0x0, 722 RB_UNKNOWN_8E01 = 0x0, 723 VPC_DBG_ECO_CNTL = 0x02000000, 724 UCHE_UNKNOWN_0E12 = 0x00000001 725 ) 726 )) 727 728add_gpus([ 729 GPUId(660), 730 ], A6xxGPUInfo( 731 CHIP.A6XX, 732 [a6xx_base, a6xx_gen4], 733 num_ccu = 3, 734 tile_align_w = 96, 735 tile_align_h = 16, 736 num_vsc_pipes = 32, 737 cs_shared_mem_size = 32 * 1024, 738 wave_granularity = 2, 739 fibers_per_sp = 128 * 2 * 16, 740 highest_bank_bit = 16, 741 magic_regs = dict( 742 PC_POWER_CNTL = 2, 743 TPL1_DBG_ECO_CNTL = 0x05008000, 744 GRAS_DBG_ECO_CNTL = 0x0, 745 SP_CHICKEN_BITS = 0x00001400, 746 UCHE_CLIENT_PF = 0x00000084, 747 PC_MODE_CNTL = 0x1f, 748 SP_DBG_ECO_CNTL = 0x01000000, 749 RB_DBG_ECO_CNTL = 0x04100000, 750 RB_DBG_ECO_CNTL_blit = 0x04100000, 751 HLSQ_DBG_ECO_CNTL = 0x0, 752 RB_UNKNOWN_8E01 = 0x0, 753 VPC_DBG_ECO_CNTL = 0x02000000, 754 UCHE_UNKNOWN_0E12 = 0x00000001 755 ) 756 )) 757 758add_gpus([ 759 GPUId(chip_id=0x6060201, name="FD644"), # Called A662 in kgsl 760 GPUId(chip_id=0xffff06060300, name="FD663"), 761 ], A6xxGPUInfo( 762 CHIP.A6XX, 763 [a6xx_base, a6xx_gen4], 764 num_ccu = 3, 765 tile_align_w = 96, 766 tile_align_h = 16, 767 num_vsc_pipes = 32, 768 cs_shared_mem_size = 32 * 1024, 769 wave_granularity = 2, 770 fibers_per_sp = 128 * 4 * 16, 771 magic_regs = dict( 772 PC_POWER_CNTL = 2, 773 TPL1_DBG_ECO_CNTL = 0x05008000, 774 GRAS_DBG_ECO_CNTL = 0x0, 775 SP_CHICKEN_BITS = 0x00001400, 776 UCHE_CLIENT_PF = 0x00000084, 777 PC_MODE_CNTL = 0x1f, 778 SP_DBG_ECO_CNTL = 0x6, 779 RB_DBG_ECO_CNTL = 0x04100000, 780 RB_DBG_ECO_CNTL_blit = 0x04100000, 781 HLSQ_DBG_ECO_CNTL = 0x0, 782 RB_UNKNOWN_8E01 = 0x0, 783 VPC_DBG_ECO_CNTL = 0x02000000, 784 UCHE_UNKNOWN_0E12 = 0x00000001 785 ) 786 )) 787 788add_gpus([ 789 GPUId(690), 790 GPUId(chip_id=0xffff06090000, name="FD690"), # Default no-speedbin fallback 791 ], A6xxGPUInfo( 792 CHIP.A6XX, 793 [a6xx_base, a6xx_gen4, A6XXProps(broken_ds_ubwc_quirk = True)], 794 num_ccu = 8, 795 tile_align_w = 64, 796 tile_align_h = 32, 797 num_vsc_pipes = 32, 798 cs_shared_mem_size = 32 * 1024, 799 wave_granularity = 2, 800 fibers_per_sp = 128 * 2 * 16, 801 highest_bank_bit = 16, 802 magic_regs = dict( 803 PC_POWER_CNTL = 7, 804 TPL1_DBG_ECO_CNTL = 0x04c00000, 805 GRAS_DBG_ECO_CNTL = 0x0, 806 SP_CHICKEN_BITS = 0x00001400, 807 UCHE_CLIENT_PF = 0x00000084, 808 PC_MODE_CNTL = 0x1f, 809 SP_DBG_ECO_CNTL = 0x1200000, 810 RB_DBG_ECO_CNTL = 0x100000, 811 RB_DBG_ECO_CNTL_blit = 0x00100000, # ??? 812 HLSQ_DBG_ECO_CNTL = 0x0, 813 RB_UNKNOWN_8E01 = 0x0, 814 VPC_DBG_ECO_CNTL = 0x2000400, 815 UCHE_UNKNOWN_0E12 = 0x00000001 816 ), 817 raw_magic_regs = [ 818 [A6XXRegs.REG_A6XX_SP_UNKNOWN_AAF2, 0x00c00000], 819 ], 820 )) 821 822# Based on a6xx_base + a6xx_gen4 823a7xx_base = A6XXProps( 824 has_gmem_fast_clear = True, 825 has_hw_multiview = True, 826 has_fs_tex_prefetch = True, 827 has_sampler_minmax = True, 828 829 supports_double_threadsize = True, 830 831 sysmem_per_ccu_depth_cache_size = 256 * 1024, 832 sysmem_per_ccu_color_cache_size = 64 * 1024, 833 gmem_ccu_color_cache_fraction = CCUColorCacheFraction.EIGHTH.value, 834 835 prim_alloc_threshold = 0x7, 836 vs_max_inputs_count = 32, 837 max_sets = 8, 838 839 reg_size_vec4 = 96, 840 # Blob limits it to 128 but we hang with 128 841 instr_cache_size = 127, 842 supports_multiview_mask = True, 843 has_z24uint_s8uint = True, 844 tess_use_shared = True, 845 storage_16bit = True, 846 has_tex_filter_cubic = True, 847 has_separate_chroma_filter = True, 848 has_sample_locations = True, 849 has_lpac = True, 850 has_getfiberid = True, 851 has_dp2acc = True, 852 has_dp4acc = True, 853 enable_lrz_fast_clear = True, 854 has_lrz_dir_tracking = True, 855 has_lrz_feedback = True, 856 has_per_view_viewport = True, 857 line_width_min = 1.0, 858 line_width_max = 127.5, 859 has_scalar_alu = True, 860 has_coherent_ubwc_flag_caches = True, 861 has_isam_v = True, 862 has_ssbo_imm_offsets = True, 863 has_early_preamble = True, 864 has_attachment_shading_rate = True, 865 has_ubwc_linear_mipmap_fallback = True, 866 prede_nop_quirk = True, 867 predtf_nop_quirk = True, 868 has_sad = True, 869 ) 870 871a7xx_gen1 = A7XXProps( 872 supports_ibo_ubwc = True, 873 fs_must_have_non_zero_constlen_quirk = True, 874 enable_tp_ubwc_flag_hint = True, 875 reading_shading_rate_requires_smask_quirk = True, 876 ) 877 878a7xx_gen2 = A7XXProps( 879 stsc_duplication_quirk = True, 880 has_event_write_sample_count = True, 881 ubwc_unorm_snorm_int_compatible = True, 882 supports_ibo_ubwc = True, 883 fs_must_have_non_zero_constlen_quirk = True, 884 # Most devices with a740 have blob v6xx which doesn't have 885 # this hint set. Match them for better compatibility by default. 886 enable_tp_ubwc_flag_hint = False, 887 has_64b_ssbo_atomics = True, 888 has_primitive_shading_rate = True, 889 reading_shading_rate_requires_smask_quirk = True, 890 has_ray_intersection = True, 891 ) 892 893a7xx_gen3 = A7XXProps( 894 has_event_write_sample_count = True, 895 load_inline_uniforms_via_preamble_ldgk = True, 896 load_shader_consts_via_preamble = True, 897 has_gmem_vpc_attr_buf = True, 898 sysmem_vpc_attr_buf_size = 0x20000, 899 gmem_vpc_attr_buf_size = 0xc000, 900 ubwc_unorm_snorm_int_compatible = True, 901 supports_ibo_ubwc = True, 902 has_generic_clear = True, 903 r8g8_faulty_fast_clear_quirk = True, 904 gs_vpc_adjacency_quirk = True, 905 storage_8bit = True, 906 ubwc_all_formats_compatible = True, 907 has_compliant_dp4acc = True, 908 ubwc_coherency_quirk = True, 909 has_persistent_counter = True, 910 has_64b_ssbo_atomics = True, 911 has_primitive_shading_rate = True, 912 has_ray_intersection = True, 913 has_sw_fuse = True, 914 has_rt_workaround = True, 915 has_alias_rt=True, 916 ) 917 918a730_magic_regs = dict( 919 TPL1_DBG_ECO_CNTL = 0x1000000, 920 GRAS_DBG_ECO_CNTL = 0x800, 921 SP_CHICKEN_BITS = 0x1440, 922 UCHE_CLIENT_PF = 0x00000084, 923 PC_MODE_CNTL = 0x0000003f, # 0x00001f1f in some tests 924 SP_DBG_ECO_CNTL = 0x10000000, 925 RB_DBG_ECO_CNTL = 0x00000000, 926 RB_DBG_ECO_CNTL_blit = 0x00000000, # is it even needed? 927 RB_UNKNOWN_8E01 = 0x0, 928 VPC_DBG_ECO_CNTL = 0x02000000, 929 UCHE_UNKNOWN_0E12 = 0x3200000, 930 931 RB_UNKNOWN_8E06 = 0x02080000, 932 ) 933 934a730_raw_magic_regs = [ 935 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00840004], 936 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724], 937 938 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00002400], 939 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00000000], 940 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 941 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 942 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000040], 943 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00008000], 944 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x20080000], 945 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21fc7f00], 946 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00000000], 947 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 948 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 949 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 950 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 951 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 952 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 953 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 954 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 955 956 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 957 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 958 959 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 960 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 961 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 962 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 963 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 964 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 965 966 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 967 968 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 969 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 970 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 971 ] 972 973a740_magic_regs = dict( 974 # PC_POWER_CNTL = 7, 975 TPL1_DBG_ECO_CNTL = 0x11100000, 976 GRAS_DBG_ECO_CNTL = 0x00004800, 977 SP_CHICKEN_BITS = 0x10001400, 978 UCHE_CLIENT_PF = 0x00000084, 979 # Blob uses 0x1f or 0x1f1f, however these values cause vertices 980 # corruption in some tests. 981 PC_MODE_CNTL = 0x0000003f, 982 SP_DBG_ECO_CNTL = 0x10000000, 983 RB_DBG_ECO_CNTL = 0x00000000, 984 RB_DBG_ECO_CNTL_blit = 0x00000000, # is it even needed? 985 # HLSQ_DBG_ECO_CNTL = 0x0, 986 RB_UNKNOWN_8E01 = 0x0, 987 VPC_DBG_ECO_CNTL = 0x02000000, 988 UCHE_UNKNOWN_0E12 = 0x00000000, 989 990 RB_UNKNOWN_8E06 = 0x02080000, 991 ) 992 993a740_raw_magic_regs = [ 994 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004], 995 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724], 996 997 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400], 998 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800], 999 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 1000 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1001 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000], 1002 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1003 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1004 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600], 1005 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1006 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1007 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1008 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1009 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1010 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1011 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1012 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1013 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1014 1015 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1016 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1017 1018 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1019 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1020 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1021 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1022 1023 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1024 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1025 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1026 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1027 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1028 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1029 1030 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1031 1032 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 1033 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1034 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1035 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], 1036 1037 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], 1038 ] 1039 1040add_gpus([ 1041 # These are named as Adreno730v3 or Adreno725v1. 1042 GPUId(chip_id=0x07030002, name="FD725"), 1043 GPUId(chip_id=0xffff07030002, name="FD725"), 1044 ], A6xxGPUInfo( 1045 CHIP.A7XX, 1046 [a7xx_base, a7xx_gen1, A7XXProps(cmdbuf_start_a725_quirk = True)], 1047 num_ccu = 4, 1048 tile_align_w = 64, 1049 tile_align_h = 32, 1050 num_vsc_pipes = 32, 1051 cs_shared_mem_size = 32 * 1024, 1052 wave_granularity = 2, 1053 fibers_per_sp = 128 * 2 * 16, 1054 highest_bank_bit = 16, 1055 magic_regs = a730_magic_regs, 1056 raw_magic_regs = a730_raw_magic_regs, 1057 )) 1058 1059add_gpus([ 1060 GPUId(chip_id=0x07030001, name="FD730"), # KGSL, no speedbin data 1061 GPUId(chip_id=0xffff07030001, name="FD730"), # Default no-speedbin fallback 1062 ], A6xxGPUInfo( 1063 CHIP.A7XX, 1064 [a7xx_base, a7xx_gen1], 1065 num_ccu = 4, 1066 tile_align_w = 64, 1067 tile_align_h = 32, 1068 num_vsc_pipes = 32, 1069 cs_shared_mem_size = 32 * 1024, 1070 wave_granularity = 2, 1071 fibers_per_sp = 128 * 2 * 16, 1072 highest_bank_bit = 16, 1073 magic_regs = a730_magic_regs, 1074 raw_magic_regs = a730_raw_magic_regs, 1075 )) 1076 1077add_gpus([ 1078 GPUId(chip_id=0x43030B00, name="FD735") 1079 ], A6xxGPUInfo( 1080 CHIP.A7XX, 1081 [a7xx_base, a7xx_gen2, A7XXProps(enable_tp_ubwc_flag_hint = True)], 1082 num_ccu = 3, 1083 tile_align_w = 96, 1084 tile_align_h = 32, 1085 num_vsc_pipes = 32, 1086 cs_shared_mem_size = 32 * 1024, 1087 wave_granularity = 2, 1088 fibers_per_sp = 128 * 2 * 16, 1089 magic_regs = dict( 1090 TPL1_DBG_ECO_CNTL = 0x11100000, 1091 GRAS_DBG_ECO_CNTL = 0x00004800, 1092 SP_CHICKEN_BITS = 0x10001400, 1093 UCHE_CLIENT_PF = 0x00000084, 1094 PC_MODE_CNTL = 0x0000001f, 1095 SP_DBG_ECO_CNTL = 0x10000000, 1096 RB_DBG_ECO_CNTL = 0x00000001, 1097 RB_DBG_ECO_CNTL_blit = 0x00000001, # is it even needed? 1098 RB_UNKNOWN_8E01 = 0x0, 1099 VPC_DBG_ECO_CNTL = 0x02000000, 1100 UCHE_UNKNOWN_0E12 = 0x00000000, 1101 1102 RB_UNKNOWN_8E06 = 0x02080000, 1103 ), 1104 raw_magic_regs = [ 1105 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000], 1106 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00040724], 1107 1108 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400], 1109 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430800], 1110 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 1111 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1112 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000000], 1113 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1114 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1115 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600], 1116 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1117 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1118 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1119 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1120 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1121 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1122 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1123 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1124 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1125 1126 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1127 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1128 1129 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1130 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1131 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1132 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1133 1134 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1135 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1136 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1137 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1138 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1139 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1140 1141 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1142 1143 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 1144 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1145 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1146 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], 1147 1148 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], 1149 ], 1150 )) 1151 1152add_gpus([ 1153 GPUId(740), # Deprecated, used for dev kernels. 1154 GPUId(chip_id=0x43050a01, name="FD740"), # KGSL, no speedbin data 1155 GPUId(chip_id=0xffff43050a01, name="FD740"), # Default no-speedbin fallback 1156 ], A6xxGPUInfo( 1157 CHIP.A7XX, 1158 [a7xx_base, a7xx_gen2], 1159 num_ccu = 6, 1160 tile_align_w = 96, 1161 tile_align_h = 32, 1162 num_vsc_pipes = 32, 1163 cs_shared_mem_size = 32 * 1024, 1164 wave_granularity = 2, 1165 fibers_per_sp = 128 * 2 * 16, 1166 highest_bank_bit = 16, 1167 magic_regs = a740_magic_regs, 1168 raw_magic_regs = a740_raw_magic_regs, 1169 )) 1170 1171add_gpus([ 1172 GPUId(chip_id=0xffff43050c01, name="Adreno X1-85"), 1173 ], A6xxGPUInfo( 1174 CHIP.A7XX, 1175 [a7xx_base, a7xx_gen2, A7XXProps(compute_constlen_quirk = True)], 1176 num_ccu = 6, 1177 tile_align_w = 96, 1178 tile_align_h = 32, 1179 num_vsc_pipes = 32, 1180 cs_shared_mem_size = 32 * 1024, 1181 wave_granularity = 2, 1182 fibers_per_sp = 128 * 2 * 16, 1183 highest_bank_bit = 16, 1184 magic_regs = a740_magic_regs, 1185 raw_magic_regs = a740_raw_magic_regs, 1186 )) 1187 1188# Values from blob v676.0 1189add_gpus([ 1190 GPUId(chip_id=0x43050a00, name="FDA32"), # Adreno A32 (G3x Gen 2) 1191 GPUId(chip_id=0xffff43050a00, name="FDA32"), 1192 ], A6xxGPUInfo( 1193 CHIP.A7XX, 1194 [a7xx_base, a7xx_gen2, A7XXProps(cmdbuf_start_a725_quirk = True)], 1195 num_ccu = 6, 1196 tile_align_w = 96, 1197 tile_align_h = 32, 1198 num_vsc_pipes = 32, 1199 cs_shared_mem_size = 32 * 1024, 1200 wave_granularity = 2, 1201 fibers_per_sp = 128 * 2 * 16, 1202 magic_regs = a740_magic_regs, 1203 raw_magic_regs = [ 1204 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00040004], 1205 [A6XXRegs.REG_A6XX_TPL1_DBG_ECO_CNTL1, 0x00000700], 1206 1207 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000400], 1208 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00430820], 1209 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00000000], 1210 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1211 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080], 1212 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1213 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1214 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x21585600], 1215 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1216 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1217 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1218 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1219 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1220 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1221 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1222 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1223 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1224 1225 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1226 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1227 1228 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1229 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1230 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1231 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1232 1233 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1234 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1235 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1236 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1237 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1238 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1239 1240 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1241 1242 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8E79, 0x00000000], 1243 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1244 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1245 ], 1246 )) 1247 1248add_gpus([ 1249 GPUId(chip_id=0x43050b00, name="FD740v3"), # Quest 3 1250 GPUId(chip_id=0xffff43050b00, name="FD740v3"), 1251 ], A6xxGPUInfo( 1252 CHIP.A7XX, 1253 [a7xx_base, a7xx_gen2, A7XXProps(enable_tp_ubwc_flag_hint = True)], 1254 num_ccu = 6, 1255 tile_align_w = 96, 1256 tile_align_h = 32, 1257 num_vsc_pipes = 32, 1258 cs_shared_mem_size = 32 * 1024, 1259 wave_granularity = 2, 1260 fibers_per_sp = 128 * 2 * 16, 1261 magic_regs = dict( 1262 # PC_POWER_CNTL = 7, 1263 TPL1_DBG_ECO_CNTL = 0x11100000, 1264 GRAS_DBG_ECO_CNTL = 0x00004800, 1265 SP_CHICKEN_BITS = 0x10001400, 1266 UCHE_CLIENT_PF = 0x00000084, 1267 # Blob uses 0x1f or 0x1f1f, however these values cause vertices 1268 # corruption in some tests. 1269 PC_MODE_CNTL = 0x0000003f, 1270 SP_DBG_ECO_CNTL = 0x10000000, 1271 RB_DBG_ECO_CNTL = 0x00000001, 1272 RB_DBG_ECO_CNTL_blit = 0x00000000, # is it even needed? 1273 # HLSQ_DBG_ECO_CNTL = 0x0, 1274 RB_UNKNOWN_8E01 = 0x0, 1275 VPC_DBG_ECO_CNTL = 0x02000000, 1276 UCHE_UNKNOWN_0E12 = 0x00000000, 1277 1278 RB_UNKNOWN_8E06 = 0x02080000, 1279 ), 1280 raw_magic_regs = a740_raw_magic_regs, 1281 )) 1282 1283add_gpus([ 1284 GPUId(chip_id=0x43051401, name="FD750"), # KGSL, no speedbin data 1285 GPUId(chip_id=0xffff43051401, name="FD750"), # Default no-speedbin fallback 1286 ], A6xxGPUInfo( 1287 CHIP.A7XX, 1288 [a7xx_base, a7xx_gen3], 1289 num_ccu = 6, 1290 tile_align_w = 96, 1291 tile_align_h = 32, 1292 num_vsc_pipes = 32, 1293 cs_shared_mem_size = 32 * 1024, 1294 wave_granularity = 2, 1295 fibers_per_sp = 128 * 2 * 16, 1296 highest_bank_bit = 16, 1297 magic_regs = dict( 1298 TPL1_DBG_ECO_CNTL = 0x11100000, 1299 GRAS_DBG_ECO_CNTL = 0x00004800, 1300 SP_CHICKEN_BITS = 0x10000400, 1301 PC_MODE_CNTL = 0x00003f1f, 1302 SP_DBG_ECO_CNTL = 0x10000000, 1303 RB_DBG_ECO_CNTL = 0x00000001, 1304 RB_DBG_ECO_CNTL_blit = 0x00000001, 1305 RB_UNKNOWN_8E01 = 0x0, 1306 VPC_DBG_ECO_CNTL = 0x02000000, 1307 UCHE_UNKNOWN_0E12 = 0x40000000, 1308 1309 RB_UNKNOWN_8E06 = 0x02082000, 1310 ), 1311 raw_magic_regs = [ 1312 [A6XXRegs.REG_A6XX_UCHE_CACHE_WAYS, 0x00000000], 1313 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E10, 0x00000000], 1314 [A6XXRegs.REG_A7XX_UCHE_UNKNOWN_0E11, 0x00000080], 1315 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE08, 0x00000000], 1316 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE09, 0x00431800], 1317 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE0A, 0x00800000], 1318 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6C, 0x00000000], 1319 [A6XXRegs.REG_A6XX_PC_DBG_ECO_CNTL, 0x00100000], 1320 [A6XXRegs.REG_A7XX_PC_UNKNOWN_9E24, 0x01585600], 1321 [A6XXRegs.REG_A7XX_VFD_UNKNOWN_A600, 0x00008000], 1322 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE06, 0x00000000], 1323 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6A, 0x00000000], 1324 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE6B, 0x00000080], 1325 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AE73, 0x00000000], 1326 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB02, 0x00000000], 1327 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB01, 0x00000000], 1328 [A6XXRegs.REG_A7XX_SP_UNKNOWN_AB22, 0x00000000], 1329 [A6XXRegs.REG_A7XX_SP_UNKNOWN_B310, 0x00000000], 1330 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8120, 0x09510840], 1331 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8121, 0x00000a62], 1332 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8009, 0x00000000], 1333 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800A, 0x00000000], 1334 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800B, 0x00000000], 1335 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_800C, 0x00000000], 1336 1337 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2, 0x00000000], 1338 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE2+1, 0x00000000], 1339 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4, 0x00000000], 1340 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE4+1, 0x00000000], 1341 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6, 0x00000000], 1342 [A6XXRegs.REG_A7XX_SP_UNKNOWN_0CE6+1, 0x00000000], 1343 1344 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_80A7, 0x00000000], 1345 1346 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8899, 0x00000000], 1347 [A6XXRegs.REG_A7XX_RB_UNKNOWN_88F5, 0x00000000], 1348 [A6XXRegs.REG_A7XX_RB_UNKNOWN_8C34, 0x00000000], 1349 1350 [A6XXRegs.REG_A7XX_GRAS_UNKNOWN_8008, 0x00000000], 1351 1352 [0x930a, 0], 1353 [0x960a, 1], 1354 [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS_CONTROL, 0], 1355 [A6XXRegs.REG_A7XX_SP_PS_ALIASED_COMPONENTS, 0], 1356 ], 1357 )) 1358 1359template = """\ 1360/* Copyright © 2021 Google, Inc. 1361 * 1362 * SPDX-License-Identifier: MIT 1363 */ 1364 1365#include "freedreno_dev_info.h" 1366#include "util/u_debug.h" 1367#include "util/log.h" 1368 1369#include <stdlib.h> 1370 1371/* Map python to C: */ 1372#define True true 1373#define False false 1374 1375%for info in s.gpu_infos: 1376static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)}; 1377%endfor 1378 1379static const struct fd_dev_rec fd_dev_recs[] = { 1380%for id, info in s.gpus.items(): 1381 { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} }, 1382%endfor 1383}; 1384 1385void 1386fd_dev_info_apply_dbg_options(struct fd_dev_info *info) 1387{ 1388 const char *env = debug_get_option("FD_DEV_FEATURES", NULL); 1389 if (!env || !*env) 1390 return; 1391 1392 char *features = strdup(env); 1393 char *feature, *feature_end; 1394 feature = strtok_r(features, ":", &feature_end); 1395 while (feature != NULL) { 1396 char *name, *name_end; 1397 name = strtok_r(feature, "=", &name_end); 1398 1399 if (!name) { 1400 mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", feature); 1401 exit(1); 1402 } 1403 1404 char *value = strtok_r(NULL, "=", &name_end); 1405 1406 feature = strtok_r(NULL, ":", &feature_end); 1407 1408%for (prop, gen), val in unique_props.items(): 1409 <% 1410 if isinstance(val, bool): 1411 parse_value = "debug_parse_bool_option" 1412 else: 1413 parse_value = "debug_parse_num_option" 1414 %> 1415 if (strcmp(name, "${prop}") == 0) { 1416 info->${gen}.${prop} = ${parse_value}(value, info->${gen}.${prop}); 1417 continue; 1418 } 1419%endfor 1420 1421 mesa_loge("Invalid feature \\"%s\\" in FD_DEV_FEATURES", name); 1422 exit(1); 1423 } 1424 1425 free(features); 1426} 1427""" 1428 1429print(Template(template).render(s=s, unique_props=A6XXProps.unique_props)) 1430 1431