1-- Parse cmdstream dump and analyse blits and batches 2 3--local posix = require "posix" 4 5function printf(fmt, ...) 6 return io.write(string.format(fmt, ...)) 7end 8 9function dbg(fmt, ...) 10 --printf(fmt, ...) 11end 12 13printf("Analyzing Data...\n") 14 15local r = rnn.init("a630") 16 17-- Each submit, all draws will target the same N MRTs: 18local mrts = {} 19local allmrts = {} -- includes historical render targets 20function push_mrt(fmt, w, h, samples, base, flag, gmem) 21 dbg("MRT: %s %ux%u 0x%x\n", fmt, w, h, base) 22 23 local mrt = {} 24 mrt.format = fmt 25 mrt.w = w 26 mrt.h = h 27 mrt.samples = samples 28 mrt.base = base 29 mrt.flag = flag 30 mrt.gmem = gmem 31 32 mrts[base] = mrt 33 allmrts[base] = mrt 34end 35 36-- And each each draw will read from M sources/textures: 37local sources = {} 38function push_source(fmt, w, h, samples, base, flag) 39 dbg("SRC: %s %ux%u 0x%x\n", fmt, w, h, base) 40 41 local source = {} 42 source.format = fmt 43 source.w = w 44 source.h = h 45 source.samples = samples 46 source.base = base 47 source.flag = flag 48 49 sources[base] = source 50end 51 52local binw 53local binh 54local nbins 55local blits = 0 56local draws = 0 57local drawmode 58local cleared 59local restored 60local resolved 61local nullbatch 62local depthtest 63local depthwrite 64local stenciltest 65local stencilwrite 66 67function start_cmdstream(name) 68 printf("Parsing %s\n", name) 69end 70 71function reset() 72 dbg("reset\n") 73 mrts = {} 74 sources = {} 75 draws = 0 76 blits = 0 77 cleared = {} 78 restored = {} 79 resolved = {} 80 depthtest = false 81 depthwrite = false 82 stenciltest = false 83 stencilwrite = false 84 drawmode = Nil 85end 86 87function start_submit() 88 dbg("start_submit\n") 89 reset() 90 nullbatch = true 91end 92 93function finish() 94 dbg("finish\n") 95 96 printf("\n") 97 98 -- TODO we get false-positives for 'NULL BATCH!' because we don't have 99 -- a really good way to differentiate between submits and cmds. Ie. 100 -- with growable cmdstream, and a large # of tiles, IB1 can get split 101 -- across multiple buffers. Since we ignore GMEM draws for window- 102 -- offset != 0,0, the later cmds will appear as null batches 103 if draws == 0 and blits == 0 then 104 if nullbatch then 105 printf("NULL BATCH!\n"); 106 end 107 return 108 end 109 110 if draws > 0 then 111 printf("Batch:\n") 112 printf("-------\n") 113 printf(" # of draws: %u\n", draws) 114 printf(" mode: %s\n", drawmode) 115 if drawmode == "RM6_GMEM" then 116 printf(" bin size: %ux%u (%u bins)\n", binw, binh, nbins) 117 end 118 if depthtest or depthwrite then 119 printf(" ") 120 if depthtest then 121 printf("DEPTHTEST ") 122 end 123 if depthwrite then 124 printf("DEPTHWRITE") 125 end 126 printf("\n") 127 end 128 if stenciltest or stencilwrite then 129 printf(" ") 130 if stenciltest then 131 printf("STENCILTEST ") 132 end 133 if stencilwrite then 134 printf("STENCILWRITE") 135 end 136 printf("\n") 137 end 138 else 139 printf("Blit:\n") 140 printf("-----\n") 141 end 142 143 for base,mrt in pairs(mrts) do 144 printf(" MRT[0x%x:0x%x]:\t%ux%u\t\t%s (%s)", base, mrt.flag, mrt.w, mrt.h, mrt.format, mrt.samples) 145 if drawmode == "RM6_GMEM" then 146 if cleared[mrt.gmem] then 147 printf("\tCLEARED") 148 end 149 if restored[mrt.gmem] then 150 printf("\tRESTORED") 151 end 152 if resolved[mrt.gmem] then 153 printf("\tRESOLVED") 154 end 155 else 156 if cleared[mrt.base] then 157 printf("\tCLEARED") 158 end 159 end 160 printf("\n") 161 end 162 163 function print_source(source) 164 printf(" SRC[0x%x:0x%x]:\t%ux%u\t\t%s (%s)\n", source.base, source.flag, source.w, source.h, source.format, source.samples) 165 end 166 167 for base,source in pairs(sources) do 168 -- only show sources that have been previously rendered to, other 169 -- textures are less interesting. Possibly this should be an 170 -- option somehow 171 if draws < 10 then 172 print_source(source) 173 elseif allmrts[base] or draws == 0 then 174 print_source(source) 175 elseif source.flag and allmrts[source.flag] then 176 print_source(source) 177 end 178 end 179 reset() 180end 181 182function end_submit() 183 dbg("end_submit\n") 184 finish() 185end 186 187-- Track the current mode: 188local mode = "" 189function CP_SET_MARKER(pkt, size) 190 mode = pkt[0].MARKER 191 dbg("mode: %s\n", mode) 192end 193 194function CP_EVENT_WRITE(pkt, size) 195 if tostring(pkt[0].EVENT) ~= "BLIT" then 196 return 197 end 198 nullbatch = false 199 local m = tostring(mode) 200 if m == "RM6_GMEM" then 201 -- either clear or restore: 202 if r.RB_BLIT_INFO.CLEAR_MASK == 0 then 203 restored[r.RB_BLIT_BASE_GMEM] = 1 204 else 205 cleared[r.RB_BLIT_BASE_GMEM] = 1 206 end 207 -- push_mrt() because we could have GMEM 208 -- passes with only a clear and no draws: 209 local flag = 0 210 local sysmem = 0; 211 -- try to match up the GMEM addr with the MRT/DEPTH state, 212 -- to avoid relying on RB_BLIT_DST also getting written: 213 for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do 214 if r.RB_MRT[n].BASE_GMEM == r.RB_BLIT_BASE_GMEM then 215 sysmem = r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32) 216 flag = r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32) 217 break 218 end 219 end 220 if sysmem == 0 and r.RB_BLIT_BASE_GMEM == r.RB_DEPTH_BUFFER_BASE_GMEM then 221 sysmem = r.RB_DEPTH_BUFFER_BASE_LO | (r.RB_DEPTH_BUFFER_BASE_HI << 32) 222 flag = r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32) 223 224 end 225 --NOTE this can get confused by previous blits: 226 --if sysmem == 0 then 227 -- -- fallback: 228 -- sysmem = r.RB_BLIT_DST_LO | (r.RB_BLIT_DST_HI << 32) 229 -- flag = r.RB_BLIT_FLAG_DST_LO | (r.RB_BLIT_FLAG_DST_HI << 32) 230 --end 231 if not r.RB_BLIT_DST_INFO.FLAGS then 232 flag = 0 233 end 234 -- TODO maybe just emit RB_BLIT_DST_LO/HI for clears.. otherwise 235 -- we get confused by stale values in registers.. not sure 236 -- if this is a problem w/ blob 237 push_mrt(r.RB_BLIT_DST_INFO.COLOR_FORMAT, 238 r.RB_BLIT_SCISSOR_BR.X + 1, 239 r.RB_BLIT_SCISSOR_BR.Y + 1, 240 r.RB_BLIT_DST_INFO.SAMPLES, 241 sysmem, 242 flag, 243 r.RB_BLIT_BASE_GMEM) 244 elseif m == "RM6_RESOLVE" then 245 resolved[r.RB_BLIT_BASE_GMEM] = 1 246 else 247 printf("I am confused!!!\n") 248 end 249end 250 251function A6XX_TEX_CONST(pkt, size) 252 push_source(pkt[0].FMT, 253 pkt[1].WIDTH, pkt[1].HEIGHT, 254 pkt[0].SAMPLES, 255 pkt[4].BASE_LO | (pkt[5].BASE_HI << 32), 256 pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32)) 257end 258 259function handle_blit() 260 -- blob sometimes uses CP_BLIT for resolves, so filter those out: 261 -- TODO it would be nice to not hard-code GMEM addr: 262 -- TODO I guess the src can be an offset from GMEM addr.. 263 if r.SP_PS_2D_SRC_LO == 0x100000 and not r.RB_2D_BLIT_CNTL.SOLID_COLOR then 264 resolved[0] = 1 265 return 266 end 267 if draws > 0 then 268 finish() 269 end 270 reset() 271 drawmode = "BLIT" 272 -- This kinda assumes that we are doing full img blits, which is maybe 273 -- Not completely legit. We could perhaps instead just track pitch and 274 -- size/pitch?? Or maybe the size doesn't matter much 275 push_mrt(r.RB_2D_DST_INFO.COLOR_FORMAT, 276 r.GRAS_2D_DST_BR.X + 1, 277 r.GRAS_2D_DST_BR.Y + 1, 278 "MSAA_ONE", 279 r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32), 280 r.RB_2D_DST_FLAGS_LO | (r.RB_2D_DST_FLAGS_HI << 32), 281 -1) 282 if r.RB_2D_BLIT_CNTL.SOLID_COLOR then 283 dbg("CLEAR=%x\n", r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)) 284 cleared[r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)] = 1 285 else 286 push_source(r.SP_2D_SRC_FORMAT.COLOR_FORMAT, 287 r.GRAS_2D_SRC_BR_X.X + 1, 288 r.GRAS_2D_SRC_BR_Y.Y + 1, 289 "MSAA_ONE", 290 r.SP_PS_2D_SRC_LO | (r.SP_PS_2D_SRC_HI << 32), 291 r.SP_PS_2D_SRC_FLAGS_LO | (r.SP_PS_2D_SRC_FLAGS_HI << 32)) 292 end 293 blits = blits + 1 294 finish() 295end 296 297function valid_transition(curmode, newmode) 298 if curmode == "RM6_BINNING" and newmode == "RM6_GMEM" then 299 return true 300 end 301 if curmode == "RM6_GMEM" and newmode == "RM6_RESOLVE" then 302 return true 303 end 304 return false 305end 306 307function draw(primtype, nindx) 308 dbg("draw: %s (%s)\n", primtype, mode) 309 nullbatch = false 310 if primtype == "BLIT_OP_SCALE" then 311 handle_blit() 312 return 313 elseif primtype == "EVENT:BLIT" then 314 return 315 end 316 317 local m = tostring(mode) 318 319 -- detect changes in drawmode which indicate a different 320 -- pass.. BINNING->GMEM means same pass, but other 321 -- transitions mean different pass: 322 if drawmode and m ~= drawmode then 323 dbg("%s -> %s transition\n", drawmode, m) 324 if not valid_transition(drawmode, m) then 325 dbg("invalid transition, new render pass!\n") 326 finish() 327 reset() 328 end 329 end 330 331 if m ~= "RM6_GMEM" and m ~= "RM6_BYPASS" then 332 if m == "RM6_BINNING" then 333 drawmode = m 334 return 335 end 336 if m == "RM6_RESOLVE" and primtype == "EVENT:BLIT" then 337 return 338 end 339 printf("unknown MODE %s for primtype %s\n", m, primtype) 340 return 341 end 342 343 -- Only count the first tile for GMEM mode to avoid counting 344 -- each draw for each tile 345 if m == "RM6_GMEM" then 346 if r.RB_WINDOW_OFFSET.X ~= 0 or r.RB_WINDOW_OFFSET.Y ~= 0 then 347 return 348 end 349 end 350 351 drawmode = m 352 local render_components = {} 353 render_components[0] = r.RB_RENDER_COMPONENTS.RT0; 354 render_components[1] = r.RB_RENDER_COMPONENTS.RT1; 355 render_components[2] = r.RB_RENDER_COMPONENTS.RT2; 356 render_components[3] = r.RB_RENDER_COMPONENTS.RT3; 357 render_components[4] = r.RB_RENDER_COMPONENTS.RT4; 358 render_components[5] = r.RB_RENDER_COMPONENTS.RT5; 359 render_components[6] = r.RB_RENDER_COMPONENTS.RT6; 360 render_components[7] = r.RB_RENDER_COMPONENTS.RT7; 361 for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do 362 if render_components[n] ~= 0 then 363 push_mrt(r.RB_MRT[n].BUF_INFO.COLOR_FORMAT, 364 r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1, 365 r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1, 366 r.RB_MSAA_CNTL.SAMPLES, 367 r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32), 368 r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32), 369 r.RB_MRT[n].BASE_GMEM) 370 end 371 end 372 373 local depthbase = r.RB_DEPTH_BUFFER_BASE_LO | 374 (r.RB_DEPTH_BUFFER_BASE_HI << 32) 375 376 if depthbase ~= 0 then 377 push_mrt(r.RB_DEPTH_BUFFER_INFO.DEPTH_FORMAT, 378 r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1, 379 r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1, 380 r.RB_MSAA_CNTL.SAMPLES, 381 depthbase, 382 r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32), 383 r.RB_DEPTH_BUFFER_BASE_GMEM) 384 end 385 386 if r.RB_DEPTH_CNTL.Z_WRITE_ENABLE then 387 depthwrite = true 388 end 389 390 if r.RB_DEPTH_CNTL.Z_ENABLE then 391 depthtest = true 392 end 393 394 -- clearly 0 != false.. :-/ 395 if r.RB_STENCILWRMASK.WRMASK ~= 0 then 396 stencilwrite = true 397 end 398 399 if r.RB_STENCIL_CONTROL.STENCIL_ENABLE then 400 stenciltest = true 401 end 402 403 -- TODO should also check for stencil buffer for z32+s8 case 404 405 if m == "RM6_GMEM" then 406 binw = r.VSC_BIN_SIZE.WIDTH 407 binh = r.VSC_BIN_SIZE.HEIGHT 408 nbins = r.VSC_BIN_COUNT.NX * r.VSC_BIN_COUNT.NY 409 end 410 411 draws = draws + 1 412end 413 414