• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 
25 #include "crashdec.h"
26 
27 
28 static void
dump_mem_pool_reg_write(unsigned reg,uint32_t data,unsigned context,bool pipe)29 dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context,
30                         bool pipe)
31 {
32    /* TODO deal better somehow w/ 64b regs: */
33    struct regacc r = {
34          .rnn = pipe ? rnn_pipe : NULL,
35          .regbase = reg,
36          .value = data,
37    };
38    if (pipe) {
39       struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
40       printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
41 
42       if (!strcmp(info->typeinfo->name, "void")) {
43          /* registers that ignore their payload */
44       } else {
45          printf("\t\t\t");
46          dump_register(&r);
47       }
48       rnn_reginfo_free(info);
49    } else {
50       printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
51       dump_register_val(&r, 2);
52    }
53 }
54 
55 static void
dump_mem_pool_chunk(const uint32_t * chunk)56 dump_mem_pool_chunk(const uint32_t *chunk)
57 {
58    struct __attribute__((packed)) {
59       bool reg0_enabled : 1;
60       bool reg1_enabled : 1;
61       uint32_t data0 : 32;
62       uint32_t data1 : 32;
63       uint32_t reg0 : 18;
64       uint32_t reg1 : 18;
65       bool reg0_pipe : 1;
66       bool reg1_pipe : 1;
67       uint32_t reg0_context : 1;
68       uint32_t reg1_context : 1;
69       uint32_t padding : 22;
70    } fields;
71 
72    memcpy(&fields, chunk, 4 * sizeof(uint32_t));
73 
74    if (fields.reg0_enabled) {
75       dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context,
76                               fields.reg0_pipe);
77    }
78 
79    if (fields.reg1_enabled) {
80       dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context,
81                               fields.reg1_pipe);
82    }
83 }
84 
85 void
dump_cp_mem_pool(uint32_t * mempool)86 dump_cp_mem_pool(uint32_t *mempool)
87 {
88    /* The mem pool is a shared pool of memory used for storing in-flight
89     * register writes. There are 6 different queues, one for each
90     * cluster. Writing to $data (or for some special registers, $addr)
91     * pushes data onto the appropriate queue, and each queue is pulled
92     * from by the appropriate cluster. The queues are thus written to
93     * in-order, but may be read out-of-order.
94     *
95     * The queues are conceptually divided into 128-bit "chunks", and the
96     * read and write pointers are in units of chunks.  These chunks are
97     * organized internally into 8-chunk "blocks", and memory is allocated
98     * dynamically in terms of blocks. Each queue is represented as a
99     * singly-linked list of blocks, as well as 3-bit start/end chunk
100     * pointers that point within the first/last block.  The next pointers
101     * are located in a separate array, rather than inline.
102     */
103 
104    /* TODO: The firmware CP_MEM_POOL save/restore routines do something
105     * like:
106     *
107     * cread $02, [ $00 + 0 ]
108     * and $02, $02, 0x118
109     * ...
110     * brne $02, 0, #label
111     * mov $03, 0x2000
112     * mov $03, 0x1000
113     * label:
114     * ...
115     *
116     * I think that control register 0 is the GPU version, and some
117     * versions have a smaller mem pool. It seems some models have a mem
118     * pool that's half the size, and a bunch of offsets are shifted
119     * accordingly. Unfortunately the kernel driver's dumping code doesn't
120     * seem to take this into account, even the downstream android driver,
121     * and we don't know which versions 0x8, 0x10, or 0x100 correspond
122     * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
123     */
124    bool small_mem_pool = false;
125 
126    /* The array of next pointers for each block. */
127    const uint32_t *next_pointers =
128       small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
129 
130    /* Maximum number of blocks in the pool, also the size of the pointers
131     * array.
132     */
133    const int num_blocks = small_mem_pool ? 0x30 : 0x80;
134 
135    /* Number of queues */
136    const unsigned num_queues = 6;
137 
138    /* Unfortunately the per-queue state is a little more complicated than
139     * a simple pair of begin/end pointers. Instead of a single beginning
140     * block, there are *two*, with the property that either the two are
141     * equal or the second is the "next" of the first. Similarly there are
142     * two end blocks. Thus the queue either looks like this:
143     *
144     * A -> B -> ... -> C -> D
145     *
146     * Or like this, or some combination:
147     *
148     * A/B -> ... -> C/D
149     *
150     * However, there's only one beginning/end chunk offset. Now the
151     * question is, which of A or B is the actual start? I.e. is the chunk
152     * offset an offset inside A or B? It depends. I'll show a typical read
153     * cycle, starting here (read pointer marked with a *) with a chunk
154     * offset of 0:
155     *
156     *	  A                    B
157     *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
158     * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
159     *
160     * Once the pointer advances far enough, the hardware decides to free
161     * A, after which the read-side state looks like:
162     *
163     *	(free)                A/B
164     *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
165     * |_|_|_|_|_|_|_|_|    |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
166     *
167     * Then after advancing the pointer a bit more, the hardware fetches
168     * the "next" pointer for A and stores it in B:
169     *
170     *	(free)                 A                     B
171     *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
172     * |_|_|_|_|_|_|_|_|    |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
173     *
174     * Then the read pointer advances into B, at which point we've come
175     * back to the first state having advanced a whole block:
176     *
177     *	(free)                 A                     B
178     *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
179     * |_|_|_|_|_|_|_|_|    |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
180     *
181     *
182     * There is a similar cycle for the write pointer. Now, the question
183     * is, how do we know which state we're in? We need to know this to
184     * know whether the pointer (*) is in A or B if they're different. It
185     * seems like there should be some bit somewhere describing this, but
186     * after lots of experimentation I've come up empty-handed. For now we
187     * assume that if the pointer is in the first half, then we're in
188     * either the first or second state and use B, and otherwise we're in
189     * the second or third state and use A. So far I haven't seen anything
190     * that violates this assumption.
191     */
192 
193    struct {
194       uint32_t unk0;
195       uint32_t padding0[7]; /* Mirrors of unk0 */
196 
197       struct {
198          uint32_t chunk : 3;
199          uint32_t first_block : 32 - 3;
200       } writer[6];
201       uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
202 
203       uint32_t unk1;
204       uint32_t padding2[7]; /* Mirrors of unk1 */
205 
206       uint32_t writer_second_block[6];
207       uint32_t padding3[2];
208 
209       uint32_t unk2[6];
210       uint32_t padding4[2];
211 
212       struct {
213          uint32_t chunk : 3;
214          uint32_t first_block : 32 - 3;
215       } reader[6];
216       uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
217 
218       uint32_t unk3;
219       uint32_t padding6[7]; /* Mirrors of unk3 */
220 
221       uint32_t reader_second_block[6];
222       uint32_t padding7[2];
223 
224       uint32_t block_count[6];
225       uint32_t padding[2];
226 
227       uint32_t unk4;
228       uint32_t padding9[7]; /* Mirrors of unk4 */
229    } data1;
230 
231    const uint32_t *data1_ptr =
232       small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
233    memcpy(&data1, data1_ptr, sizeof(data1));
234 
235    /* Based on the kernel, the first dword is the mem pool size (in
236     * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
237     */
238    const uint32_t *data2_ptr =
239       small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
240    const int data2_size = 0x60;
241 
242    /* This seems to be the size of each queue in chunks. */
243    const uint32_t *queue_sizes = &data2_ptr[0x18];
244 
245    printf("\tdata2:\n");
246    dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
247 
248    /* These seem to be some kind of counter of allocated/deallocated blocks */
249    if (verbose) {
250       printf("\tunk0: %x\n", data1.unk0);
251       printf("\tunk1: %x\n", data1.unk1);
252       printf("\tunk3: %x\n", data1.unk3);
253       printf("\tunk4: %x\n\n", data1.unk4);
254    }
255 
256    for (int queue = 0; queue < num_queues; queue++) {
257       const char *cluster_names[6] = {"FE",   "SP_VS", "PC_VS",
258                                       "GRAS", "SP_PS", "PS"};
259       printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
260 
261       if (verbose) {
262          printf("\t\twriter_first_block: 0x%x\n",
263                 data1.writer[queue].first_block);
264          printf("\t\twriter_second_block: 0x%x\n",
265                 data1.writer_second_block[queue]);
266          printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
267          printf("\t\treader_first_block: 0x%x\n",
268                 data1.reader[queue].first_block);
269          printf("\t\treader_second_block: 0x%x\n",
270                 data1.reader_second_block[queue]);
271          printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
272          printf("\t\tblock_count: %d\n", data1.block_count[queue]);
273          printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
274          printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
275       }
276 
277       uint32_t cur_chunk = data1.reader[queue].chunk;
278       uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block
279                                          : data1.reader_second_block[queue];
280       uint32_t last_chunk = data1.writer[queue].chunk;
281       uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block
282                                            : data1.writer_second_block[queue];
283 
284       if (verbose)
285          printf("\tblock %x\n", cur_block);
286       if (cur_block >= num_blocks) {
287          fprintf(stderr, "block %x too large\n", cur_block);
288          exit(1);
289       }
290       unsigned calculated_queue_size = 0;
291       while (cur_block != last_block || cur_chunk != last_chunk) {
292          calculated_queue_size++;
293          uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
294 
295          dump_mem_pool_chunk(chunk_ptr);
296 
297          printf("\t%05x: %08x %08x %08x %08x\n",
298                 4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0],
299                 chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
300 
301          cur_chunk++;
302          if (cur_chunk == 8) {
303             cur_block = next_pointers[cur_block];
304             if (verbose)
305                printf("\tblock %x\n", cur_block);
306             if (cur_block >= num_blocks) {
307                fprintf(stderr, "block %x too large\n", cur_block);
308                exit(1);
309             }
310             cur_chunk = 0;
311          }
312       }
313       if (calculated_queue_size != queue_sizes[queue]) {
314          printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n",
315                 calculated_queue_size);
316       }
317       printf("\n");
318    }
319 }
320 
321