• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1-- Parse cmdstream dump and analyse blits and batches
2
3--local posix = require "posix"
4
5function printf(fmt, ...)
6	return io.write(string.format(fmt, ...))
7end
8
9function dbg(fmt, ...)
10	--printf(fmt, ...)
11end
12
13printf("Analyzing Data...\n")
14
15local r = rnn.init("a630")
16
17-- Each submit, all draws will target the same N MRTs:
18local mrts = {}
19local allmrts = {}  -- includes historical render targets
20function push_mrt(fmt, w, h, samples, base, flag, gmem)
21	dbg("MRT: %s %ux%u 0x%x\n", fmt, w, h, base)
22
23	local mrt = {}
24	mrt.format = fmt
25	mrt.w = w
26	mrt.h = h
27	mrt.samples = samples
28	mrt.base = base
29	mrt.flag = flag
30	mrt.gmem = gmem
31
32	mrts[base] = mrt
33	allmrts[base] = mrt
34end
35
36-- And each each draw will read from M sources/textures:
37local sources = {}
38function push_source(fmt, w, h, samples, base, flag)
39	dbg("SRC: %s %ux%u 0x%x\n", fmt, w, h, base)
40
41	local source = {}
42	source.format = fmt
43	source.w = w
44	source.h = h
45	source.samples = samples
46	source.base = base
47	source.flag = flag
48
49	sources[base] = source
50end
51
52local binw
53local binh
54local nbins
55local blits = 0
56local draws = 0
57local drawmode
58local cleared
59local restored
60local resolved
61local nullbatch
62local depthtest
63local depthwrite
64local stenciltest
65local stencilwrite
66
67function start_cmdstream(name)
68	printf("Parsing %s\n", name)
69end
70
71function reset()
72	dbg("reset\n")
73	mrts = {}
74	sources = {}
75	draws = 0
76	blits = 0
77	cleared = {}
78	restored = {}
79	resolved = {}
80	depthtest = false
81	depthwrite = false
82	stenciltest = false
83	stencilwrite = false
84	drawmode = Nil
85end
86
87function start_submit()
88	dbg("start_submit\n")
89	reset()
90	nullbatch = true
91end
92
93function finish()
94	dbg("finish\n")
95
96	printf("\n")
97
98	-- TODO we get false-positives for 'NULL BATCH!' because we don't have
99	-- a really good way to differentiate between submits and cmds.  Ie.
100	-- with growable cmdstream, and a large # of tiles, IB1 can get split
101	-- across multiple buffers.  Since we ignore GMEM draws for window-
102	-- offset != 0,0, the later cmds will appear as null batches
103	if draws == 0 and blits == 0 then
104		if nullbatch then
105			printf("NULL BATCH!\n");
106		end
107		return
108	end
109
110	if draws > 0 then
111		printf("Batch:\n")
112		printf("-------\n")
113		printf("  # of draws: %u\n", draws)
114		printf("  mode: %s\n", drawmode)
115		if drawmode == "RM6_GMEM" then
116			printf("  bin size: %ux%u (%u bins)\n", binw, binh, nbins)
117		end
118		if depthtest or depthwrite then
119			printf("  ")
120			if depthtest then
121				printf("DEPTHTEST ")
122			end
123			if depthwrite then
124				printf("DEPTHWRITE")
125			end
126			printf("\n")
127		end
128		if stenciltest or stencilwrite then
129			printf("  ")
130			if stenciltest then
131				printf("STENCILTEST ")
132			end
133			if stencilwrite then
134				printf("STENCILWRITE")
135			end
136			printf("\n")
137		end
138	else
139		printf("Blit:\n")
140		printf("-----\n")
141	end
142
143	for base,mrt in pairs(mrts) do
144		printf("  MRT[0x%x:0x%x]:\t%ux%u\t\t%s (%s)", base, mrt.flag, mrt.w, mrt.h, mrt.format, mrt.samples)
145		if drawmode == "RM6_GMEM" then
146			if cleared[mrt.gmem] then
147				printf("\tCLEARED")
148			end
149			if restored[mrt.gmem] then
150				printf("\tRESTORED")
151			end
152			if resolved[mrt.gmem] then
153				printf("\tRESOLVED")
154			end
155		else
156			if cleared[mrt.base] then
157				printf("\tCLEARED")
158			end
159		end
160		printf("\n")
161	end
162
163	function print_source(source)
164		printf("  SRC[0x%x:0x%x]:\t%ux%u\t\t%s (%s)\n", source.base, source.flag, source.w, source.h, source.format, source.samples)
165	end
166
167	for base,source in pairs(sources) do
168		-- only show sources that have been previously rendered to, other
169		-- textures are less interesting.  Possibly this should be an
170		-- option somehow
171		if draws < 10 then
172			print_source(source)
173		elseif allmrts[base] or draws == 0 then
174			print_source(source)
175		elseif source.flag and allmrts[source.flag] then
176			print_source(source)
177		end
178	end
179	reset()
180end
181
182function end_submit()
183	dbg("end_submit\n")
184	finish()
185end
186
187-- Track the current mode:
188local mode = ""
189function CP_SET_MARKER(pkt, size)
190	mode = pkt[0].MARKER
191	dbg("mode: %s\n", mode)
192end
193
194function CP_EVENT_WRITE(pkt, size)
195	if tostring(pkt[0].EVENT) ~= "BLIT" then
196		return
197	end
198	nullbatch = false
199	local m = tostring(mode)
200	if m == "RM6_GMEM" then
201		-- either clear or restore:
202		if r.RB_BLIT_INFO.CLEAR_MASK == 0 then
203			restored[r.RB_BLIT_BASE_GMEM] = 1
204		else
205			cleared[r.RB_BLIT_BASE_GMEM] = 1
206		end
207		-- push_mrt() because we could have GMEM
208		-- passes with only a clear and no draws:
209		local flag = 0
210		local sysmem = 0;
211		-- try to match up the GMEM addr with the MRT/DEPTH state,
212		-- to avoid relying on RB_BLIT_DST also getting written:
213		for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
214			if r.RB_MRT[n].BASE_GMEM == r.RB_BLIT_BASE_GMEM then
215				sysmem = r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32)
216				flag = r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32)
217				break
218			end
219		end
220		if sysmem == 0 and r.RB_BLIT_BASE_GMEM == r.RB_DEPTH_BUFFER_BASE_GMEM then
221			sysmem = r.RB_DEPTH_BUFFER_BASE_LO | (r.RB_DEPTH_BUFFER_BASE_HI << 32)
222			flag = r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32)
223
224		end
225		--NOTE this can get confused by previous blits:
226		--if sysmem == 0 then
227		--	-- fallback:
228		--	sysmem = r.RB_BLIT_DST_LO | (r.RB_BLIT_DST_HI << 32)
229		--	flag = r.RB_BLIT_FLAG_DST_LO | (r.RB_BLIT_FLAG_DST_HI << 32)
230		--end
231		if not r.RB_BLIT_DST_INFO.FLAGS then
232			flag = 0
233		end
234		-- TODO maybe just emit RB_BLIT_DST_LO/HI for clears.. otherwise
235		-- we get confused by stale values in registers.. not sure
236		-- if this is a problem w/ blob
237		push_mrt(r.RB_BLIT_DST_INFO.COLOR_FORMAT,
238			r.RB_BLIT_SCISSOR_BR.X + 1,
239			r.RB_BLIT_SCISSOR_BR.Y + 1,
240			r.RB_BLIT_DST_INFO.SAMPLES,
241			sysmem,
242			flag,
243			r.RB_BLIT_BASE_GMEM)
244	elseif m == "RM6_RESOLVE" then
245		resolved[r.RB_BLIT_BASE_GMEM] = 1
246	else
247		printf("I am confused!!!\n")
248	end
249end
250
251function A6XX_TEX_CONST(pkt, size)
252	push_source(pkt[0].FMT,
253		pkt[1].WIDTH, pkt[1].HEIGHT,
254		pkt[0].SAMPLES,
255		pkt[4].BASE_LO | (pkt[5].BASE_HI << 32),
256		pkt[7].FLAG_LO | (pkt[8].FLAG_HI << 32))
257end
258
259function handle_blit()
260	-- blob sometimes uses CP_BLIT for resolves, so filter those out:
261	-- TODO it would be nice to not hard-code GMEM addr:
262	-- TODO I guess the src can be an offset from GMEM addr..
263	if r.SP_PS_2D_SRC_LO == 0x100000 and not r.RB_2D_BLIT_CNTL.SOLID_COLOR then
264		resolved[0] = 1
265		return
266	end
267	if draws > 0 then
268		finish()
269	end
270	reset()
271	drawmode = "BLIT"
272	-- This kinda assumes that we are doing full img blits, which is maybe
273	-- Not completely legit.  We could perhaps instead just track pitch and
274	-- size/pitch??  Or maybe the size doesn't matter much
275	push_mrt(r.RB_2D_DST_INFO.COLOR_FORMAT,
276		r.GRAS_2D_DST_BR.X + 1,
277		r.GRAS_2D_DST_BR.Y + 1,
278		"MSAA_ONE",
279		r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32),
280		r.RB_2D_DST_FLAGS_LO | (r.RB_2D_DST_FLAGS_HI << 32),
281		-1)
282	if r.RB_2D_BLIT_CNTL.SOLID_COLOR then
283		dbg("CLEAR=%x\n", r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32))
284		cleared[r.RB_2D_DST_LO | (r.RB_2D_DST_HI << 32)] = 1
285	else
286		push_source(r.SP_2D_SRC_FORMAT.COLOR_FORMAT,
287			r.GRAS_2D_SRC_BR_X.X + 1,
288			r.GRAS_2D_SRC_BR_Y.Y + 1,
289			"MSAA_ONE",
290			r.SP_PS_2D_SRC_LO | (r.SP_PS_2D_SRC_HI << 32),
291			r.SP_PS_2D_SRC_FLAGS_LO | (r.SP_PS_2D_SRC_FLAGS_HI << 32))
292	end
293	blits = blits + 1
294	finish()
295end
296
297function valid_transition(curmode, newmode)
298	if curmode == "RM6_BINNING" and newmode == "RM6_GMEM" then
299		return true
300	end
301	if curmode == "RM6_GMEM" and newmode == "RM6_RESOLVE" then
302		return true
303	end
304	return false
305end
306
307function draw(primtype, nindx)
308	dbg("draw: %s (%s)\n", primtype, mode)
309	nullbatch = false
310	if primtype == "BLIT_OP_SCALE" then
311		handle_blit()
312		return
313	elseif primtype == "EVENT:BLIT" then
314		return
315	end
316
317	local m = tostring(mode)
318
319	-- detect changes in drawmode which indicate a different
320	-- pass..  BINNING->GMEM means same pass, but other
321	-- transitions mean different pass:
322	if drawmode and m ~= drawmode then
323		dbg("%s -> %s transition\n", drawmode, m)
324		if not valid_transition(drawmode, m) then
325			dbg("invalid transition, new render pass!\n")
326			finish()
327			reset()
328		end
329	end
330
331	if m ~= "RM6_GMEM" and m ~= "RM6_BYPASS" then
332		if m == "RM6_BINNING" then
333			drawmode = m
334			return
335		end
336		if m == "RM6_RESOLVE" and primtype == "EVENT:BLIT" then
337			return
338		end
339		printf("unknown MODE %s for primtype %s\n", m, primtype)
340		return
341	end
342
343	-- Only count the first tile for GMEM mode to avoid counting
344	-- each draw for each tile
345	if m == "RM6_GMEM" then
346		if r.RB_WINDOW_OFFSET.X ~= 0 or r.RB_WINDOW_OFFSET.Y ~= 0 then
347			return
348		end
349	end
350
351	drawmode = m
352	local render_components = {}
353	render_components[0] = r.RB_RENDER_COMPONENTS.RT0;
354	render_components[1] = r.RB_RENDER_COMPONENTS.RT1;
355	render_components[2] = r.RB_RENDER_COMPONENTS.RT2;
356	render_components[3] = r.RB_RENDER_COMPONENTS.RT3;
357	render_components[4] = r.RB_RENDER_COMPONENTS.RT4;
358	render_components[5] = r.RB_RENDER_COMPONENTS.RT5;
359	render_components[6] = r.RB_RENDER_COMPONENTS.RT6;
360	render_components[7] = r.RB_RENDER_COMPONENTS.RT7;
361	for n = 0,r.RB_FS_OUTPUT_CNTL1.MRT-1 do
362		if render_components[n] ~= 0 then
363			push_mrt(r.RB_MRT[n].BUF_INFO.COLOR_FORMAT,
364				r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
365				r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
366				r.RB_MSAA_CNTL.SAMPLES,
367				r.RB_MRT[n].BASE_LO | (r.RB_MRT[n].BASE_HI << 32),
368				r.RB_MRT_FLAG_BUFFER[n].ADDR_LO | (r.RB_MRT_FLAG_BUFFER[n].ADDR_HI << 32),
369				r.RB_MRT[n].BASE_GMEM)
370		end
371	end
372
373	local depthbase = r.RB_DEPTH_BUFFER_BASE_LO |
374			(r.RB_DEPTH_BUFFER_BASE_HI << 32)
375
376	if depthbase ~= 0 then
377		push_mrt(r.RB_DEPTH_BUFFER_INFO.DEPTH_FORMAT,
378			r.GRAS_SC_SCREEN_SCISSOR[0].BR.X + 1,
379			r.GRAS_SC_SCREEN_SCISSOR[0].BR.Y + 1,
380			r.RB_MSAA_CNTL.SAMPLES,
381			depthbase,
382			r.RB_DEPTH_FLAG_BUFFER_BASE_LO | (r.RB_DEPTH_FLAG_BUFFER_BASE_HI << 32),
383			r.RB_DEPTH_BUFFER_BASE_GMEM)
384	end
385
386	if r.RB_DEPTH_CNTL.Z_WRITE_ENABLE then
387		depthwrite = true
388	end
389
390	if r.RB_DEPTH_CNTL.Z_ENABLE then
391		depthtest = true
392	end
393
394	-- clearly 0 != false.. :-/
395	if r.RB_STENCILWRMASK.WRMASK ~= 0 then
396		stencilwrite = true
397	end
398
399	if r.RB_STENCIL_CONTROL.STENCIL_ENABLE then
400		stenciltest = true
401	end
402
403	-- TODO should also check for stencil buffer for z32+s8 case
404
405	if m == "RM6_GMEM" then
406		binw = r.VSC_BIN_SIZE.WIDTH
407		binh = r.VSC_BIN_SIZE.HEIGHT
408		nbins = r.VSC_BIN_COUNT.NX * r.VSC_BIN_COUNT.NY
409	end
410
411	draws = draws + 1
412end
413
414