1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <string.h>
29
30 #include "pvr_device_info.h"
31 #include "pvr_pds.h"
32 #include "pvr_rogue_pds_defs.h"
33 #include "pvr_rogue_pds_disasm.h"
34 #include "pvr_rogue_pds_encode.h"
35 #include "util/log.h"
36 #include "util/macros.h"
37
38 #define R32_C(x) ((x) + PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER)
39 #define R32_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER)
40 #define R32_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER)
41
42 #define R32TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER)
43 #define R32TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER)
44
45 #define R64_C(x) ((x) + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)
46 #define R64_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER)
47 #define R64_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER)
48
49 #define R64TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER)
50 #define R64TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER)
51
52 /* 32-bit PTemp index for draw indirect base instance. */
53 #define PVR_INDIRECT_BASE_INSTANCE_PTEMP 1U
54
55 /* Number of constants to reserve per DDMAD instruction in the PDS Vertex. */
56 #define PVR_PDS_DDMAD_NUM_CONSTS 8
57
58 #if defined(TRACE_PDS)
59 /* Some macros for a pretty printing. */
60
61 # define pvr_debug_pds_const(reg, size, annotation) \
62 mesa_logd("const[%d] @ (%dbits) %s", reg, size, annotation)
63 # define pvr_debug_pds_temp(reg, size, annotation) \
64 mesa_logd("temp[%d] @ (%dbits) %s", reg, size, annotation)
65 # define pvr_debug_pds_note(...) mesa_logd(" // " __VA_ARGS__)
66 # define pvr_debug_pds_flag(flags, flag) \
67 { \
68 if ((flags & flag) == flag) \
69 mesa_logd(" > " #flag); \
70 }
71 # define pvr_debug(annotation) mesa_logd(annotation)
72
73 #else
74 # define pvr_debug_pds_const(reg, size, annotation)
75 # define pvr_debug_pds_temp(reg, size, annotation)
76 # define pvr_debug_pds_note(...)
77 # define pvr_debug_pds_flag(flags, flag)
78 # define pvr_debug(annotation)
79 #endif
80
81 struct pvr_pds_const_map_entry_write_state {
82 const struct pvr_pds_info *PDS_info;
83 struct pvr_const_map_entry *entry;
84 size_t size_of_last_entry_in_bytes;
85 uint32_t entry_count;
86 size_t entries_size_in_bytes;
87 };
88
pvr_init_pds_const_map_entry_write_state(struct pvr_pds_info * PDS_info,struct pvr_pds_const_map_entry_write_state * entry_write_state)89 static void pvr_init_pds_const_map_entry_write_state(
90 struct pvr_pds_info *PDS_info,
91 struct pvr_pds_const_map_entry_write_state *entry_write_state)
92 {
93 entry_write_state->PDS_info = PDS_info;
94 entry_write_state->entry = PDS_info->entries;
95 entry_write_state->size_of_last_entry_in_bytes = 0;
96 entry_write_state->entry_count = 0;
97 entry_write_state->entries_size_in_bytes = 0;
98 }
99
100 /* Returns a pointer to the next struct pvr_const_map_entry. */
pvr_prepare_next_pds_const_map_entry(struct pvr_pds_const_map_entry_write_state * entry_write_state,size_t size_of_next_entry_in_bytes)101 static void *pvr_prepare_next_pds_const_map_entry(
102 struct pvr_pds_const_map_entry_write_state *entry_write_state,
103 size_t size_of_next_entry_in_bytes)
104 {
105 /* Move on to the next entry. */
106 uint8_t *next_entry = ((uint8_t *)entry_write_state->entry +
107 entry_write_state->size_of_last_entry_in_bytes);
108 entry_write_state->entry = (struct pvr_const_map_entry *)next_entry;
109
110 entry_write_state->size_of_last_entry_in_bytes = size_of_next_entry_in_bytes;
111 entry_write_state->entry_count++;
112 entry_write_state->entries_size_in_bytes += size_of_next_entry_in_bytes;
113
114 /* Check if we can write into the next entry. */
115 assert(entry_write_state->entries_size_in_bytes <=
116 entry_write_state->PDS_info->entries_size_in_bytes);
117
118 return entry_write_state->entry;
119 }
120
pvr_write_pds_const_map_entry_vertex_attribute_address(struct pvr_pds_const_map_entry_write_state * entry_write_state,const struct pvr_pds_vertex_dma * DMA,uint32_t const_val,bool use_robust_vertex_fetch)121 static void pvr_write_pds_const_map_entry_vertex_attribute_address(
122 struct pvr_pds_const_map_entry_write_state *entry_write_state,
123 const struct pvr_pds_vertex_dma *DMA,
124 uint32_t const_val,
125 bool use_robust_vertex_fetch)
126 {
127 pvr_debug_pds_note("DMA %d dwords, stride %d, offset %d, bindingIdx %d",
128 DMA->size_in_dwords,
129 DMA->stride,
130 DMA->offset,
131 DMA->binding_index);
132
133 if (use_robust_vertex_fetch) {
134 struct pvr_const_map_entry_robust_vertex_attribute_address
135 *robust_attribute_entry;
136
137 robust_attribute_entry =
138 pvr_prepare_next_pds_const_map_entry(entry_write_state,
139 sizeof(*robust_attribute_entry));
140 robust_attribute_entry->type =
141 PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS;
142 robust_attribute_entry->const_offset = const_val;
143 robust_attribute_entry->binding_index = DMA->binding_index;
144 robust_attribute_entry->component_size_in_bytes =
145 DMA->component_size_in_bytes;
146 robust_attribute_entry->offset = DMA->offset;
147 robust_attribute_entry->stride = DMA->stride;
148 robust_attribute_entry->size_in_dwords = DMA->size_in_dwords;
149 robust_attribute_entry->robustness_buffer_offset =
150 DMA->robustness_buffer_offset;
151 } else {
152 struct pvr_const_map_entry_vertex_attribute_address *attribute_entry;
153
154 attribute_entry =
155 pvr_prepare_next_pds_const_map_entry(entry_write_state,
156 sizeof(*attribute_entry));
157 attribute_entry->type =
158 PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS;
159 attribute_entry->const_offset = const_val;
160 attribute_entry->binding_index = DMA->binding_index;
161 attribute_entry->offset = DMA->offset;
162 attribute_entry->stride = DMA->stride;
163 attribute_entry->size_in_dwords = DMA->size_in_dwords;
164 }
165 }
166
pvr_pds_encode_doutu(uint32_t cc,uint32_t end,uint32_t src0)167 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
168 uint32_t end,
169 uint32_t src0)
170 {
171 return pvr_pds_inst_encode_dout(cc,
172 end,
173 0,
174 src0,
175 PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
176 }
177
178 static uint32_t
pvr_encode_burst(struct pvr_pds_const_map_entry_write_state * entry_write_state,bool last_DMA,bool halt,unsigned int const32,unsigned int const64,unsigned int dma_size_in_dwords,unsigned int destination,unsigned int store)179 pvr_encode_burst(struct pvr_pds_const_map_entry_write_state *entry_write_state,
180 bool last_DMA,
181 bool halt,
182 unsigned int const32,
183 unsigned int const64,
184 unsigned int dma_size_in_dwords,
185 unsigned int destination,
186 unsigned int store)
187 {
188 uint32_t literal_value;
189
190 /* Encode literal value. */
191 literal_value = dma_size_in_dwords
192 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
193 literal_value |= destination
194 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
195 literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
196 store;
197
198 if (last_DMA)
199 literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
200
201 /* Create const map entry. */
202 struct pvr_const_map_entry_literal32 *literal_entry;
203
204 literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
205 sizeof(*literal_entry));
206 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
207 literal_entry->const_offset = const32;
208 literal_entry->literal_value = literal_value;
209
210 /* Encode DOUTD */
211 return pvr_pds_inst_encode_dout(0,
212 halt,
213 R32_C(const32),
214 R64_C(const64),
215 PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
216 }
217
218 #define pvr_encode_burst_cs(psDataEntry, \
219 last_DMA, \
220 halt, \
221 const32, \
222 const64, \
223 dma_size_in_dwords, \
224 destination) \
225 pvr_encode_burst( \
226 psDataEntry, \
227 last_DMA, \
228 halt, \
229 const32, \
230 const64, \
231 dma_size_in_dwords, \
232 destination, \
233 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE)
234
pvr_encode_direct_write(struct pvr_pds_const_map_entry_write_state * entry_write_state,bool last_DMA,bool halt,unsigned int const32,unsigned int const64,uint32_t data_mask,unsigned int destination,uint32_t destination_store,const struct pvr_device_info * dev_info)235 static uint32_t pvr_encode_direct_write(
236 struct pvr_pds_const_map_entry_write_state *entry_write_state,
237 bool last_DMA,
238 bool halt,
239 unsigned int const32,
240 unsigned int const64,
241 uint32_t data_mask,
242 unsigned int destination,
243 uint32_t destination_store,
244 const struct pvr_device_info *dev_info)
245 {
246 struct pvr_const_map_entry_literal32 *literal_entry;
247
248 uint32_t instruction =
249 pvr_pds_inst_encode_dout(0,
250 halt,
251 const32,
252 const64,
253 PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
254
255 literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
256 sizeof(*literal_entry));
257 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
258 literal_entry->const_offset = const32;
259 literal_entry->literal_value = destination_store;
260
261 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
262 literal_entry->literal_value |=
263 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED;
264 }
265
266 literal_entry->literal_value |=
267 destination << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
268
269 if (data_mask == 0x1) {
270 literal_entry->literal_value |=
271 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER;
272 } else if (data_mask == 0x2) {
273 literal_entry->literal_value |=
274 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER;
275 } else {
276 literal_entry->literal_value |=
277 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64;
278 }
279
280 if (last_DMA) {
281 literal_entry->literal_value |=
282 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
283 }
284
285 return instruction;
286 }
287
288 /* Constant and Temporary register allocation
289 * - reserve space for a 32-bit register or a 64-bit register
290 * - returned indices are offsets to 32-bit register locations
291 * - 64-bit registers need to be aligned to even indices.
292 */
293 #define RESERVE_32BIT 1U
294 #define RESERVE_64BIT 2U
295
296 #if defined(DEBUG)
297 # define pvr_find_constant(usage, words, name) \
298 pvr_find_constant2(usage, words, name)
299 # define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, name)
300 #else
301 # define pvr_find_constant(usage, words, name) \
302 pvr_find_constant2(usage, words, NULL);
303 # define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, NULL)
304 #endif
305
306 static uint32_t
pvr_find_constant2(uint8_t * const_usage,uint8_t words,const char * const_name)307 pvr_find_constant2(uint8_t *const_usage, uint8_t words, const char *const_name)
308 {
309 uint32_t const_index = ~0U;
310 uint32_t step = words;
311 uint8_t mask = (1 << words) - 1;
312
313 assert(words == 1 || words == 2);
314
315 /* Find a register at 'step' alignment that satisfies the mask. */
316 for (uint32_t i = 0; i < PVR_MAX_VERTEX_ATTRIB_DMAS; i++) {
317 for (uint32_t b = 0; b < PVR_PDS_DDMAD_NUM_CONSTS; b += step) {
318 if ((const_usage[i] & (mask << b)) != 0)
319 continue;
320 const_usage[i] |= (mask << b);
321 const_index = i * 8 + b;
322 pvr_debug_pds_const(const_index, words * 32, const_name);
323 return const_index;
324 }
325 }
326
327 unreachable("Unexpected: Space cannot be found for constant");
328 return ~0U;
329 }
330
331 #define PVR_MAX_PDS_TEMPS 32
332 struct pvr_temp_usage {
333 uint32_t temp_usage;
334 uint8_t temp_used;
335 uint8_t temps_needed;
336 };
337
338 #define PVR_INVALID_TEMP UINT8_C(~0)
339
pvr_get_temps2(struct pvr_temp_usage * temps,uint8_t temps_needed,const char * temp_name)340 static uint8_t pvr_get_temps2(struct pvr_temp_usage *temps,
341 uint8_t temps_needed,
342 const char *temp_name)
343 {
344 uint8_t step = temps_needed;
345 uint8_t mask = (1 << temps_needed) - 1;
346
347 assert(temps_needed == 1 || temps_needed == 2);
348 assert(temps->temp_used + temps_needed <= PVR_MAX_PDS_TEMPS);
349
350 for (uint8_t i = 0; i < PVR_MAX_PDS_TEMPS; i += step) {
351 if ((temps->temp_usage & (mask << i)) != 0)
352 continue;
353
354 const size_t clzBits = 8 * sizeof(unsigned int);
355
356 temps->temp_usage |= (mask << i);
357 temps->temp_used += temps_needed;
358 temps->temps_needed =
359 clzBits - __builtin_clz((unsigned int)temps->temp_usage);
360
361 pvr_debug_pds_temp(i, temps_needed * 32, temp_name);
362
363 return i;
364 }
365
366 unreachable("Unexpected: Space cannot be found for temps");
367 return PVR_INVALID_TEMP;
368 }
369
370 /**
371 * Wrapper macro to add a toggle for "data mode", allowing us to calculate the
372 * size of a PDS program without actually attempting to store it.
373 *
374 * \param dest The array/memory pointer where the PDS program should be stored.
375 * If the given code is NULL, automatically switch to count mode
376 * instead of attempting to fill in unallocated memory.
377 * \param counter The local counter that holds the total instruction count.
378 * \param statement What function call/value should be stored at dest[counter]
379 * when condition is false.
380 */
381
382 #define PVR_PDS_MODE_TOGGLE(dest, counter, statement) \
383 if (!dest) { \
384 counter++; \
385 } else { \
386 dest[counter++] = statement; \
387 PVR_PDS_PRINT_INST(statement); \
388 }
389
390 /**
391 * Generates the PDS vertex primary program for the dma's listed in the input
392 * structure. Produces the constant map for the Vulkan driver based upon the
393 * requirements of the instructions added to the program.
394 *
395 * PDS Data Layout
396 * ---------------
397 *
398 * The PDS data is optimized for the DDMAD layout, with the data for those
399 * instructions laid out first. The data required for other instructions is laid
400 * out in the entries unused by the DDMADs.
401 *
402 * DDMAD layout
403 * \verbatim
404 * bank | index | usage
405 * 0 | 0:1 | temps (current index)[-]
406 * 2 | 2:3 | stride[32]
407 * 1 | 4:5 | base address[64]
408 * 3 | 6:7 | ctrl[64]
409 * \endverbatim
410 *
411 * Each DMA whose stride > 0 requires one entry, laid out as above. We stride
412 * over the banks to ensure that each ddmad reads each of its operands from a
413 * different bank (i.e. remove bank clashes)
414 *
415 * Note: This is "wasting" const[0:1] and const[2], however these free
416 * registers will be used by other, non-ddmad instructions.
417 *
418 * The const register usage is maintained in the au8ConstUsage array, the
419 * DDMAD instructions, for example, will utilize the top 5 registers in each
420 * block of 8 hence a 'usage mask' of 0xF8 (0b11111000).
421 *
422 * Constant Map
423 * ------------
424 *
425 * The constant map is built up as we add PDS instructions and passed back
426 * for the driver to fill in the PDS data section with the correct parameters
427 * for each draw call.
428 *
429 * \param input_program PDS Program description.
430 * \param code Buffer to be filled in with the PDS program. If NULL is provided,
431 * automatically switch to count-mode, preventing writes to
432 * unallocated memory.
433 * \param info PDS info structure filled in for the driver, contains the
434 * constant map.
435 * \param use_robust_vertex_fetch Do vertex fetches apply range checking.
436 * \param dev_info pvr device information struct.
437 */
pvr_pds_generate_vertex_primary_program(struct pvr_pds_vertex_primary_program_input * input_program,uint32_t * code,struct pvr_pds_info * info,bool use_robust_vertex_fetch,const struct pvr_device_info * dev_info)438 void pvr_pds_generate_vertex_primary_program(
439 struct pvr_pds_vertex_primary_program_input *input_program,
440 uint32_t *code,
441 struct pvr_pds_info *info,
442 bool use_robust_vertex_fetch,
443 const struct pvr_device_info *dev_info)
444 {
445 struct pvr_pds_const_map_entry_write_state entry_write_state;
446 struct pvr_const_map_entry_doutu_address *doutu_address_entry;
447
448 uint32_t instruction = 0; /* index into code */
449 uint32_t index; /* index used for current attribute, either vertex or
450 * instance.
451 */
452
453 uint32_t total_dma_count = 0;
454 uint32_t running_dma_count = 0;
455
456 uint32_t write_instance_control = ~0;
457 uint32_t write_vertex_control = ~0;
458 uint32_t write_base_instance_control = ~0;
459 uint32_t write_base_vertex_control = ~0;
460 uint32_t pvr_write_draw_index_control = ~0;
461
462 uint32_t ddmad_count = 0;
463 uint32_t doutw_count = 0;
464
465 uint32_t base_instance = 0;
466 uint32_t base_vertex = 0;
467 uint32_t draw_index = 0;
468
469 uint8_t const_usage[PVR_MAX_VERTEX_ATTRIB_DMAS] = { 0 };
470
471 struct pvr_temp_usage temp_usage = { 0 };
472
473 uint32_t zero_temp = PVR_INVALID_TEMP;
474
475 uint32_t max_index_temp = PVR_INVALID_TEMP;
476 uint32_t current_index_temp = PVR_INVALID_TEMP;
477
478 uint32_t index_id_temp = PVR_INVALID_TEMP;
479 uint32_t base_instance_ID_temp = PVR_INVALID_TEMP;
480 uint32_t instance_ID_temp = PVR_INVALID_TEMP;
481
482 /* Debug tracing of program flags. */
483 pvr_debug("pvr_pds_generate_vertex_primary_program");
484 pvr_debug("=================================================");
485 pvr_debug_pds_flag(input_program->flags,
486 PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED);
487 pvr_debug_pds_flag(input_program->flags,
488 PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED);
489 pvr_debug_pds_flag(input_program->flags,
490 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT);
491 pvr_debug_pds_flag(input_program->flags,
492 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT);
493 pvr_debug_pds_flag(input_program->flags,
494 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED);
495 pvr_debug_pds_flag(input_program->flags,
496 PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED);
497 pvr_debug_pds_flag(input_program->flags,
498 PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED);
499 pvr_debug(" ");
500
501 pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
502
503 /* At a minimum we need 2 dwords for the DOUTU, but since we allocate in
504 * blocks of 4 we can reserve dwords for the instance/vertex DOUTW.
505 */
506 info->data_size_in_dwords = 4;
507
508 /* Reserve 2 temps - these are automatically filled in by the VDM
509 *
510 * For instanced draw calls we manually increment the instance id by the
511 * base-instance offset which is either provided as a constant, or in a
512 * ptemp (for draw indirect)
513 *
514 * temp - contents
515 * ---------------
516 * 0 - index id (pre-filled)
517 * 1 - base instance + instance id
518 */
519 index_id_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Index id");
520 instance_ID_temp =
521 pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Instance id");
522
523 /* Reserve the lowest 2 dwords for DOUTU.
524 * [------XX]
525 */
526 const_usage[0] = 0x03;
527
528 /* Reserve consts for all the DDMAD's. */
529 for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
530 /* Mark the consts required by this ddmad "in-use".
531 * [XXXXX---]
532 */
533 const_usage[ddmad_count++] |= 0xf8;
534 }
535
536 /* Start off by assuming we can fit everything in the 8 dwords/ddmad
537 * footprint, if any DOUTD/DOUTW falls outside we will increase this
538 * counter.
539 */
540 if (ddmad_count)
541 info->data_size_in_dwords = PVR_PDS_DDMAD_NUM_CONSTS * ddmad_count;
542
543 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
544 doutw_count++;
545 write_vertex_control =
546 pvr_find_constant(const_usage, RESERVE_32BIT, "Vertex id DOUTW Ctrl");
547 }
548
549 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
550 doutw_count++;
551 write_instance_control = pvr_find_constant(const_usage,
552 RESERVE_32BIT,
553 "Instance id DOUTW Ctrl");
554 }
555
556 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
557 doutw_count++;
558 write_base_instance_control =
559 pvr_find_constant(const_usage,
560 RESERVE_32BIT,
561 "Base Instance DOUTW Ctrl");
562 }
563
564 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
565 doutw_count++;
566 write_base_vertex_control = pvr_find_constant(const_usage,
567 RESERVE_32BIT,
568 "Base Vertex DOUTW Ctrl");
569
570 /* Load base vertex from constant for non-indirect variants. */
571 if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
572 0) {
573 struct pvr_const_map_entry_base_vertex *psBaseVertexEntry =
574 (struct pvr_const_map_entry_base_vertex *)entry_write_state.entry;
575
576 base_vertex =
577 pvr_find_constant(const_usage, RESERVE_32BIT, "base_vertex");
578
579 psBaseVertexEntry =
580 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
581 sizeof(*psBaseVertexEntry));
582 psBaseVertexEntry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX;
583 psBaseVertexEntry->const_offset = base_vertex;
584 }
585 }
586
587 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
588 doutw_count++;
589 pvr_write_draw_index_control =
590 pvr_find_constant(const_usage, RESERVE_32BIT, "Draw Index DOUTW Ctrl");
591
592 /* Set draw index to 0 for non-indirect variants. */
593 if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
594 0) {
595 struct pvr_const_map_entry_literal32 *literal_entry;
596
597 draw_index =
598 pvr_find_constant(const_usage, RESERVE_32BIT, "draw_index");
599
600 literal_entry =
601 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
602 sizeof(*literal_entry));
603 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
604 literal_entry->const_offset = draw_index;
605 literal_entry->literal_value = 0;
606 }
607 }
608
609 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
610 /* Load absolute instance id into uiInstanceIdTemp. */
611 PVR_PDS_MODE_TOGGLE(
612 code,
613 instruction,
614 pvr_pds_inst_encode_add32(
615 /* cc */ 0,
616 /* alum */ 0,
617 /* sna */ 0,
618 /* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
619 /* src1 */ R32_T(instance_ID_temp),
620 /* dst */ R32TP_T(instance_ID_temp)));
621 } else if (input_program->flags &
622 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
623 struct pvr_const_map_entry_base_instance *base_instance_entry =
624 (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
625
626 base_instance =
627 pvr_find_constant(const_usage, RESERVE_32BIT, "base_instance");
628
629 PVR_PDS_MODE_TOGGLE(code,
630 instruction,
631 pvr_pds_inst_encode_add32(
632 /* cc */ 0,
633 /* alum */ 0,
634 /* sna */ 0,
635 /* src0 */ R32_C(base_instance),
636 /* src1 */ R32_T(instance_ID_temp),
637 /* dst */ R32TP_T(instance_ID_temp)));
638
639 base_instance_entry =
640 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
641 sizeof(*base_instance_entry));
642 base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
643 base_instance_entry->const_offset = base_instance;
644 } else if (input_program->flags &
645 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
646 struct pvr_const_map_entry_base_instance *base_instance_entry =
647 (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
648
649 base_instance = pvr_find_constant(const_usage,
650 RESERVE_32BIT,
651 "base_instance (Driver Const)");
652
653 /* Base instance provided by the driver. */
654 base_instance_entry =
655 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
656 sizeof(*base_instance_entry));
657 base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
658 base_instance_entry->const_offset = base_instance;
659 }
660
661 total_dma_count = ddmad_count;
662
663 total_dma_count += doutw_count;
664
665 if (use_robust_vertex_fetch) {
666 pvr_debug_pds_note("RobustBufferVertexFetch Initialization");
667
668 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
669 zero_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "zero_temp");
670
671 /* Load 0 into instance_ID_temp. */
672 PVR_PDS_MODE_TOGGLE(code,
673 instruction,
674 pvr_pds_inst_encode_limm(0, /* cc */
675 zero_temp, /* SRC1 */
676 0, /* SRC0 */
677 0 /* GR */
678 ));
679 } else {
680 zero_temp = pvr_get_temps(&temp_usage, RESERVE_64BIT, "zero_temp");
681
682 max_index_temp =
683 pvr_get_temps(&temp_usage, RESERVE_64BIT, "uMaxIndex");
684 current_index_temp =
685 pvr_get_temps(&temp_usage, RESERVE_64BIT, "uCurrentIndex");
686
687 PVR_PDS_MODE_TOGGLE(code,
688 instruction,
689 pvr_pds_inst_encode_stflp64(
690 0, /* cc */
691 PVR_ROGUE_PDSINST_LOP_XOR, /* LOP */
692 1, /* IM */
693 R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
694 */
695 R64TP_T(zero_temp >> 1), /* SRC1 (REGS64TP)
696 */
697 0, /* SRC2 (REGS32) */
698 R64TP_T(zero_temp >> 1) /* DST (REG64TP) */
699 ));
700 PVR_PDS_MODE_TOGGLE(code,
701 instruction,
702 pvr_pds_inst_encode_stflp64(
703 0, /* cc */
704 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
705 1, /* IM */
706 R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
707 */
708 0, /* SRC1 (REGS64TP) */
709 0, /* SRC2 (REGS32) */
710 R64TP_T(current_index_temp >> 1) /* DST */
711 /* (REG64TP) */
712 ));
713 PVR_PDS_MODE_TOGGLE(code,
714 instruction,
715 pvr_pds_inst_encode_stflp64(
716 0, /* cc */
717 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
718 1, /* IM */
719 R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
720 */
721 0, /* SRC1 (REGS64TP) */
722 0, /* SRC2 (REGS32) */
723 R64TP_T(max_index_temp >> 1) /* DST */
724 /* (REG64TP) */
725 ));
726 }
727 }
728
729 if (input_program->dma_count && use_robust_vertex_fetch) {
730 PVR_PDS_MODE_TOGGLE(
731 code,
732 instruction,
733 pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCC */
734 0, /* Neg */
735 PVR_HAS_FEATURE(dev_info, pds_ddmadt)
736 ? PVR_ROGUE_PDSINST_PREDICATE_OOB
737 : PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETC */
738 1 /* Addr */
739 ));
740 }
741
742 for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
743 uint32_t const_base = dma * PVR_PDS_DDMAD_NUM_CONSTS;
744 uint32_t control_word;
745 struct pvr_const_map_entry_literal32 *literal_entry;
746
747 struct pvr_pds_vertex_dma *vertex_dma = &input_program->dma_list[dma];
748 bool last_DMA = (++running_dma_count == total_dma_count);
749
750 pvr_debug_pds_note("Vertex Attribute DMA %d (last=%d)", dma, last_DMA);
751
752 /* The id we use to index into this dma. */
753 if (vertex_dma->flags & PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE) {
754 pvr_debug_pds_note("Instance Rate (divisor = %d)",
755 vertex_dma->divisor);
756
757 /* 4 - madd 0 - needs to be 64-bit aligned
758 * 5 - madd 1
759 */
760 if (vertex_dma->divisor > 1) {
761 const uint32_t adjusted_instance_ID_temp =
762 pvr_get_temps(&temp_usage,
763 RESERVE_64BIT,
764 "adjusted_instance_ID_temp");
765 const uint32_t MADD_temp =
766 pvr_get_temps(&temp_usage, RESERVE_64BIT, "MADD_temp");
767
768 /* 1. Remove base instance value from temp 1 to get instance id
769 * 2. Divide the instance id by the divisor - Iout = (Iin *
770 * Multiplier) >> (shift+31)
771 * 3. Add the base instance back on.
772 *
773 * Need two zero temps for the add part of the later MAD.
774 */
775
776 PVR_PDS_MODE_TOGGLE(code,
777 instruction,
778 pvr_pds_inst_encode_add64(
779 /* cc */ 0,
780 /* alum */ 0,
781 /* sna */ 1,
782 /* src0 */ R64_T(MADD_temp >> 1),
783 /* src1 */ R64_T(MADD_temp >> 1),
784 /* dst */ R64TP_T(MADD_temp >> 1)));
785
786 if (input_program->flags &
787 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
788 /* Subtract base instance from temp 1, put into
789 * adjusted_instance_ID_temp.
790 */
791 PVR_PDS_MODE_TOGGLE(
792 code,
793 instruction,
794 pvr_pds_inst_encode_add32(
795 /* cc */ 0,
796 /* alum */ 0,
797 /* sna */ 1,
798 /* src0 */ R32_T(instance_ID_temp),
799 /* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
800 /* dst */ R32TP_T(adjusted_instance_ID_temp)));
801 } else if (input_program->flags &
802 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
803 /* Subtract base instance from temp 1, put into
804 * adjusted_instance_ID_temp.
805 */
806 PVR_PDS_MODE_TOGGLE(
807 code,
808 instruction,
809 pvr_pds_inst_encode_add32(
810 /* cc */ 0,
811 /* alum */ 0,
812 /* sna */ 1,
813 /* src0 */ R32_T(instance_ID_temp),
814 /* src1 */ R32_C(base_instance),
815 /* dst */ R32TP_T(adjusted_instance_ID_temp)));
816 } else {
817 /* Copy instance from temp 1 to adjusted_instance_ID_temp.
818 */
819 PVR_PDS_MODE_TOGGLE(
820 code,
821 instruction,
822 pvr_pds_inst_encode_add32(
823 /* cc */ 0,
824 /* alum */ 0,
825 /* sna */ 0,
826 /* src0 */ R32_T(instance_ID_temp),
827 /* src1 */ R32_T(MADD_temp), /* MADD_temp is set
828 * to 0 at this point.
829 */
830 /* dst */ R32TP_T(adjusted_instance_ID_temp)));
831 }
832
833 /* shift = the bit of the next highest power of two. */
834 uint32_t shift_unsigned =
835 (31 - __builtin_clz(vertex_dma->divisor - 1)) + 1;
836 int32_t shift = (int32_t)shift_unsigned;
837 uint32_t shift_2s_comp;
838
839 pvr_debug_pds_note(
840 "Perform instance rate divide (as integer multiply and rshift)");
841
842 const uint32_t multipier_constant =
843 pvr_find_constant(const_usage,
844 RESERVE_32BIT,
845 "MultiplierConstant (for InstanceDivisor)");
846
847 /* multiplier = ( 2^(shift + 31) + (divisor - 1) ) / divisor,
848 note: the division above is integer division. */
849 uint64_t multipier64 =
850 (uint64_t)((((uint64_t)1 << ((uint64_t)shift_unsigned + 31)) +
851 ((uint64_t)vertex_dma->divisor - (uint64_t)1)) /
852 (uint64_t)vertex_dma->divisor);
853 uint32_t multiplier = (uint32_t)multipier64;
854
855 pvr_debug_pds_note(" - Value of MultiplierConstant = %u",
856 multiplier);
857 pvr_debug_pds_note(" - Value of Shift = %d", shift);
858
859 literal_entry =
860 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
861 sizeof(*literal_entry));
862 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
863 literal_entry->const_offset = multipier_constant;
864 literal_entry->literal_value = multiplier;
865
866 /* (Iin * Multiplier) */
867 PVR_PDS_MODE_TOGGLE(
868 code,
869 instruction,
870 pvr_rogue_inst_encode_mad(0, /* Sign of add is positive */
871 0, /* Unsigned ALU mode */
872 0, /* Unconditional */
873 R32_C(multipier_constant),
874 R32_T(adjusted_instance_ID_temp),
875 R64_T(MADD_temp / 2),
876 R64TP_T(MADD_temp / 2)));
877
878 /* >> (shift + 31) */
879 shift += 31;
880 shift *= -1;
881
882 if (shift < -31) {
883 /* >> (31) */
884 shift_2s_comp = 0xFFFE1;
885 PVR_PDS_MODE_TOGGLE(code,
886 instruction,
887 pvr_pds_inst_encode_stflp64(
888 /* cc */ 0,
889 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
890 /* IM */ 1, /* enable immediate */
891 /* SRC0 */ R64_T(MADD_temp / 2),
892 /* SRC1 */ 0, /* This won't be used
893 in a shift
894 operation. */
895 /* SRC2 (Shift) */ shift_2s_comp,
896 /* DST */ R64TP_T(MADD_temp / 2)));
897 shift += 31;
898 }
899
900 /* >> (shift + 31) */
901 shift_2s_comp = *((uint32_t *)&shift);
902 PVR_PDS_MODE_TOGGLE(code,
903 instruction,
904 pvr_pds_inst_encode_stflp64(
905 /* cc */ 0,
906 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
907 /* IM */ 1, /* enable immediate */
908 /* SRC0 */ R64_T(MADD_temp / 2),
909 /* SRC1 */ 0, /* This won't be used
910 * in a shift
911 * operation. */
912 /* SRC2 (Shift) */ shift_2s_comp,
913 /* DST */ R64TP_T(MADD_temp / 2)));
914
915 if (input_program->flags &
916 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
917 /* Add base instance. */
918 PVR_PDS_MODE_TOGGLE(
919 code,
920 instruction,
921 pvr_pds_inst_encode_add32(
922 /* cc */ 0,
923 /* alum */ 0,
924 /* sna */ 0,
925 /* src0 */ R32_T(MADD_temp),
926 /* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
927 /* dst */ R32TP_T(MADD_temp)));
928 } else if (input_program->flags &
929 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
930 /* Add base instance. */
931 PVR_PDS_MODE_TOGGLE(code,
932 instruction,
933 pvr_pds_inst_encode_add32(
934 /* cc */ 0,
935 /* alum */ 0,
936 /* sna */ 0,
937 /* src0 */ R32_T(MADD_temp),
938 /* src1 */ R32_C(base_instance),
939 /* dst */ R32TP_T(MADD_temp)));
940 }
941
942 pvr_debug_pds_note(
943 "DMA Vertex Index will be sourced from 'MADD_temp'");
944 index = MADD_temp;
945 } else if (vertex_dma->divisor == 0) {
946 if (base_instance_ID_temp == PVR_INVALID_TEMP) {
947 base_instance_ID_temp = pvr_get_temps(&temp_usage,
948 RESERVE_32BIT,
949 "uBaseInstanceIDTemp");
950 }
951
952 /* Load 0 into instance_ID_temp. */
953 PVR_PDS_MODE_TOGGLE(code,
954 instruction,
955 pvr_pds_inst_encode_limm(
956 /* cc */ 0,
957 /* src1 */ base_instance_ID_temp,
958 /* src0 */ 0,
959 /* gr */ 0));
960
961 if (input_program->flags &
962 PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
963 /* Add base instance. */
964 PVR_PDS_MODE_TOGGLE(
965 code,
966 instruction,
967 pvr_pds_inst_encode_add32(
968 /* cc */ 0,
969 /* alum */ 0,
970 /* sna */ 0,
971 /* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
972 /* src1 */ R32_T(base_instance_ID_temp),
973 /* dst */ R32TP_T(base_instance_ID_temp)));
974
975 } else if (input_program->flags &
976 PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
977 /* Add base instance. */
978 PVR_PDS_MODE_TOGGLE(
979 code,
980 instruction,
981 pvr_pds_inst_encode_add32(
982 /* cc */ 0,
983 /* alum */ 0,
984 /* sna */ 0,
985 /* src0 */ R32_C(base_instance),
986 /* src1 */ R32_T(base_instance_ID_temp),
987 /* dst */ R32TP_T(base_instance_ID_temp)));
988 }
989
990 pvr_debug_pds_note(
991 "DMA Vertex Index will be sourced from 'uBaseInstanceIdTemp'");
992 index = base_instance_ID_temp;
993 } else {
994 pvr_debug_pds_note(
995 "DMA Vertex Index will be sourced from 'uInstanceIdTemp'");
996 index = instance_ID_temp;
997 }
998 } else {
999 pvr_debug_pds_note(
1000 "DMA Vertex Index will be sourced from 'uIndexIdTemp'");
1001 index = index_id_temp;
1002 }
1003
1004 /* DDMAD Const Usage [__XX_---] */
1005 pvr_write_pds_const_map_entry_vertex_attribute_address(
1006 &entry_write_state,
1007 vertex_dma,
1008 const_base + 4,
1009 use_robust_vertex_fetch);
1010
1011 /* DDMAD Const Usage [__XXX---] */
1012 literal_entry =
1013 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1014 sizeof(*literal_entry));
1015 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1016 literal_entry->const_offset = const_base + 3;
1017 literal_entry->literal_value = vertex_dma->stride;
1018
1019 control_word = vertex_dma->size_in_dwords
1020 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1021 control_word |= vertex_dma->destination
1022 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1023 control_word |= (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1024 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1025
1026 /* DDMADT instructions will do a dummy doutd when OOB if
1027 * PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN is set but as the driver
1028 * would need to do another doutd after an OOB DDMADT to provide the 'in
1029 * bounds' data the DDMADT can't be set as LAST.
1030 *
1031 * This requires us to include a final dummy DDMAD.LAST instruction.
1032 *
1033 * Pseudocode taken from SeriesXE2017.PDS Instruction Controller
1034 * Specification.doc
1035 *
1036 * DDMAD src0,src1,src2,src3
1037 *
1038 * calculated_source_address := src0*src1+src2
1039 * base_address := src2
1040 * dma_parameters := src3[31:0]
1041 * buffer_size := src3[63:33]
1042 * test := src3[32]
1043 *
1044 * if (test == 1) {
1045 * // DDMAD(T)
1046 * if (calculated_source_address[39:0] + (burst_size<<2) <=
1047 * base_address[39:0] + buffer_size) {
1048 * OOB := 0
1049 * DOUTD calculated_source_address,dma_paramters
1050 * } else {
1051 * OOB := 1
1052 * if (last_instance == 1) {
1053 * dma_parameters[BURST_SIZE] := 0
1054 * DOUTD calculated_source_address,dma_paramters
1055 * }
1056 * }
1057 * } else {
1058 * // DDMAD
1059 * DOUTD calculated_source_address,dma_paramters
1060 * }
1061 */
1062
1063 if (last_DMA && (!PVR_HAS_FEATURE(dev_info, pds_ddmadt) ||
1064 !use_robust_vertex_fetch)) {
1065 pvr_debug_pds_note("LAST DDMAD");
1066 control_word |= PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1067 }
1068
1069 /* DDMAD Const Usage [_XXXX---] */
1070 literal_entry =
1071 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1072 sizeof(*literal_entry));
1073 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1074 literal_entry->const_offset = (const_base + 6);
1075 literal_entry->literal_value = control_word;
1076
1077 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1078 /* DDMAD Const Usage [XXXXX---]
1079 * With DDMADT an extra 32bits of SRC3 contains the information for
1080 * performing out-of-bounds tests on the DMA.
1081 */
1082
1083 if (use_robust_vertex_fetch) {
1084 struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size
1085 *obb_buffer_size;
1086 obb_buffer_size =
1087 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1088 sizeof(*obb_buffer_size));
1089
1090 obb_buffer_size->type =
1091 PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE;
1092 obb_buffer_size->const_offset = const_base + 7;
1093 obb_buffer_size->binding_index = vertex_dma->binding_index;
1094 } else {
1095 literal_entry =
1096 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1097 sizeof(*literal_entry));
1098 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1099 literal_entry->const_offset = const_base + 7;
1100 literal_entry->literal_value = 0;
1101 }
1102
1103 PVR_PDS_MODE_TOGGLE(
1104 code,
1105 instruction,
1106 pvr_pds_inst_encode_ddmad(0, /* cc */
1107 0, /* END */
1108 R32_C(const_base + 3), /* SRC0 (REGS32) */
1109 index, /* SRC1 (REGS32T) */
1110 R64_C((const_base + 4) >> 1), /* SRC2
1111 * (REGS64)
1112 */
1113 R64_C((const_base + 6) >> 1) /* SRC3
1114 * (REGS64C)
1115 */
1116 ));
1117
1118 if (use_robust_vertex_fetch) {
1119 /* If not out of bounds, skip next DDMAD instructions. */
1120 PVR_PDS_MODE_TOGGLE(code,
1121 instruction,
1122 pvr_pds_inst_encode_ddmad(
1123 1, /* cc */
1124 0, /* END */
1125 R32_C(const_base + 3), /* SRC0 (REGS32) */
1126 R32_T(zero_temp), /* SRC1 (REGS32T) */
1127 R64_C((const_base + 4) >> 1), /* SRC2
1128 * (REGS64)
1129 */
1130 R64_C((const_base + 6) >> 1) /* SRC3
1131 * (REGS64C)
1132 */
1133 ));
1134
1135 /* Now the driver must have a dummy DDMAD marked as last. */
1136 if (last_DMA) {
1137 uint32_t dummy_dma_const = pvr_find_constant(const_usage,
1138 RESERVE_64BIT,
1139 "uDummyDMAConst");
1140 uint32_t zero_const =
1141 pvr_find_constant(const_usage, RESERVE_64BIT, "uZeroConst");
1142
1143 literal_entry =
1144 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1145 sizeof(*literal_entry));
1146 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1147 literal_entry->const_offset = zero_const;
1148 literal_entry->literal_value = 0;
1149
1150 literal_entry =
1151 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1152 sizeof(*literal_entry));
1153 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1154 literal_entry->const_offset = zero_const + 1;
1155 literal_entry->literal_value = 0;
1156
1157 literal_entry =
1158 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1159 sizeof(*literal_entry));
1160 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1161 literal_entry->const_offset = dummy_dma_const;
1162 literal_entry->literal_value = 0;
1163
1164 literal_entry->literal_value |=
1165 0 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1166 literal_entry->literal_value |=
1167 (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1168 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
1169 literal_entry->literal_value |=
1170 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1171
1172 literal_entry =
1173 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1174 sizeof(*literal_entry));
1175 literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
1176 literal_entry->const_offset = dummy_dma_const + 1;
1177 literal_entry->literal_value = 0;
1178
1179 PVR_PDS_MODE_TOGGLE(code,
1180 instruction,
1181 pvr_pds_inst_encode_ddmad(
1182 0, /* cc */
1183 0, /* END */
1184 R32_C(zero_const), /* SRC0 (REGS32)
1185 */
1186 R32_T(zero_temp), /* SRC1 (REGS32T)
1187 */
1188 R64_C((dummy_dma_const) >> 1), /* SRC2
1189 (REGS64)
1190 */
1191 R64_C((dummy_dma_const) >> 1) /* SRC3
1192 (REGS64C)
1193 */
1194 ));
1195 }
1196 }
1197 } else {
1198 if (use_robust_vertex_fetch) {
1199 struct pvr_const_map_entry_vertex_attribute_max_index
1200 *max_index_entry;
1201
1202 pvr_debug("RobustVertexFetch DDMAD");
1203
1204 const uint32_t max_index_const =
1205 pvr_find_constant(const_usage, RESERVE_32BIT, "max_index_const");
1206
1207 max_index_entry =
1208 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1209 sizeof(*max_index_entry));
1210 max_index_entry->const_offset = max_index_const;
1211 max_index_entry->type =
1212 PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX;
1213 max_index_entry->binding_index = vertex_dma->binding_index;
1214 max_index_entry->offset = vertex_dma->offset;
1215 max_index_entry->stride = vertex_dma->stride;
1216 max_index_entry->size_in_dwords = vertex_dma->size_in_dwords;
1217 max_index_entry->component_size_in_bytes =
1218 vertex_dma->component_size_in_bytes;
1219
1220 PVR_PDS_MODE_TOGGLE(
1221 code,
1222 instruction,
1223 pvr_pds_inst_encode_add32(0, /* cc */
1224 0, /* ALUM */
1225 PVR_ROGUE_PDSINST_LOP_NONE, /* SNA */
1226 R32_C(max_index_const), /* SRC0
1227 * (REGS32)
1228 */
1229 R32_T(zero_temp), /* SRC1 (REGS32) */
1230 R32TP_T(max_index_temp) /* DST
1231 * (REG32TP)
1232 */
1233 ));
1234
1235 PVR_PDS_MODE_TOGGLE(code,
1236 instruction,
1237 pvr_pds_inst_encode_stflp32(
1238 1, /* IM */
1239 0, /* cc */
1240 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1241 index, /* SRC0 (REGS32T) */
1242 0, /* SRC1 (REGS32) */
1243 0, /* SRC2 (REG32TP) */
1244 R32TP_T(current_index_temp) /* DST
1245 * (REG32TP)
1246 */
1247 ));
1248
1249 PVR_PDS_MODE_TOGGLE(
1250 code,
1251 instruction,
1252 pvr_pds_inst_encode_cmp(
1253 0, /* cc enable */
1254 PVR_ROGUE_PDSINST_COP_GT, /* Operation */
1255 R64TP_T(current_index_temp >> 1), /* SRC
1256 * (REGS64TP)
1257 */
1258 R64_T(max_index_temp >> 1) /* SRC1 (REGS64) */
1259 ));
1260
1261 PVR_PDS_MODE_TOGGLE(code,
1262 instruction,
1263 pvr_pds_inst_encode_stflp32(
1264 1, /* IM */
1265 1, /* cc */
1266 PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
1267 zero_temp, /* SRC0 (REGS32T) */
1268 0, /* SRC1 (REGS32) */
1269 0, /* SRC2 (REG32TP) */
1270 R32TP_T(current_index_temp) /* DST
1271 * (REG32TP)
1272 */
1273 ));
1274
1275 PVR_PDS_MODE_TOGGLE(code,
1276 instruction,
1277 pvr_pds_inst_encode_ddmad(
1278 0, /* cc */
1279 0, /* END */
1280 R32_C(const_base + 3), /* SRC0 (REGS32) */
1281 current_index_temp, /* SRC1 (REGS32T) */
1282 R64_C((const_base + 4) >> 1), /* SRC2
1283 * (REGS64)
1284 */
1285 (const_base + 6) >> 1 /* SRC3 (REGS64C) */
1286 ));
1287 } else {
1288 PVR_PDS_MODE_TOGGLE(code,
1289 instruction,
1290 pvr_pds_inst_encode_ddmad(
1291 /* cc */ 0,
1292 /* end */ 0,
1293 /* src0 */ R32_C(const_base + 3),
1294 /* src2 */ (index),
1295 /* src1 */ R64_C((const_base + 4) >> 1),
1296 /* src3 */ (const_base + 6) >> 1));
1297 }
1298 }
1299 }
1300
1301 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
1302 bool last_DMA = (++running_dma_count == total_dma_count);
1303
1304 PVR_PDS_MODE_TOGGLE(
1305 code,
1306 instruction,
1307 pvr_encode_direct_write(
1308 &entry_write_state,
1309 last_DMA,
1310 false,
1311 R64_C(write_vertex_control),
1312 R64_T(0),
1313 0x1,
1314 input_program->vertex_id_register,
1315 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1316 dev_info));
1317 }
1318
1319 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
1320 bool last_DMA = (++running_dma_count == total_dma_count);
1321
1322 PVR_PDS_MODE_TOGGLE(
1323 code,
1324 instruction,
1325 pvr_encode_direct_write(
1326 &entry_write_state,
1327 last_DMA,
1328 false,
1329 R64_C(write_instance_control),
1330 R64_T(0),
1331 0x2,
1332 input_program->instance_id_register,
1333 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1334 dev_info));
1335 }
1336
1337 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
1338 bool last_DMA = (++running_dma_count == total_dma_count);
1339
1340 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1341 /* Base instance comes from ptemp 1. */
1342 PVR_PDS_MODE_TOGGLE(
1343 code,
1344 instruction,
1345 pvr_encode_direct_write(
1346 &entry_write_state,
1347 last_DMA,
1348 false,
1349 R64_C(write_base_instance_control),
1350 R64_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP >> 1),
1351 0x2,
1352 input_program->base_instance_register,
1353 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1354 dev_info));
1355 } else {
1356 uint32_t data_mask = (base_instance & 1) ? 0x2 : 0x1;
1357
1358 /* Base instance comes from driver constant. */
1359 PVR_PDS_MODE_TOGGLE(
1360 code,
1361 instruction,
1362 pvr_encode_direct_write(
1363 &entry_write_state,
1364 last_DMA,
1365 false,
1366 R64_C(write_base_instance_control),
1367 R64_C(base_instance >> 1),
1368 data_mask,
1369 input_program->base_instance_register,
1370 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1371 dev_info));
1372 }
1373 }
1374
1375 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
1376 bool last_DMA = (++running_dma_count == total_dma_count);
1377
1378 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1379 /* Base vertex comes from ptemp 0 (initialized by PDS hardware). */
1380 PVR_PDS_MODE_TOGGLE(
1381 code,
1382 instruction,
1383 pvr_encode_direct_write(
1384 &entry_write_state,
1385 last_DMA,
1386 false,
1387 R64_C(write_base_vertex_control),
1388 R64_P(0),
1389 0x1,
1390 input_program->base_vertex_register,
1391 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1392 dev_info));
1393 } else {
1394 uint32_t data_mask = (base_vertex & 1) ? 0x2 : 0x1;
1395
1396 /* Base vertex comes from driver constant (literal 0). */
1397 PVR_PDS_MODE_TOGGLE(
1398 code,
1399 instruction,
1400 pvr_encode_direct_write(
1401 &entry_write_state,
1402 last_DMA,
1403 false,
1404 R64_C(write_base_vertex_control),
1405 R64_C(base_vertex >> 1),
1406 data_mask,
1407 input_program->base_vertex_register,
1408 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1409 dev_info));
1410 }
1411 }
1412
1413 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
1414 bool last_DMA = (++running_dma_count == total_dma_count);
1415
1416 if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
1417 /* Draw index comes from ptemp 3. */
1418 PVR_PDS_MODE_TOGGLE(
1419 code,
1420 instruction,
1421 pvr_encode_direct_write(
1422 &entry_write_state,
1423 last_DMA,
1424 false,
1425 R64_C(pvr_write_draw_index_control),
1426 R64_P(1),
1427 0x2,
1428 input_program->draw_index_register,
1429 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1430 dev_info));
1431 } else {
1432 uint32_t data_mask = (draw_index & 1) ? 0x2 : 0x1;
1433
1434 /* Draw index comes from constant (literal 0). */
1435 PVR_PDS_MODE_TOGGLE(
1436 code,
1437 instruction,
1438 pvr_encode_direct_write(
1439 &entry_write_state,
1440 last_DMA,
1441 false,
1442 R64_C(pvr_write_draw_index_control),
1443 R64_C(draw_index >> 1),
1444 data_mask,
1445 input_program->draw_index_register,
1446 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1447 dev_info));
1448 }
1449 }
1450
1451 doutu_address_entry =
1452 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1453 sizeof(*doutu_address_entry));
1454 doutu_address_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1455 doutu_address_entry->const_offset = 0;
1456 doutu_address_entry->doutu_control = input_program->usc_task_control.src0;
1457
1458 if (use_robust_vertex_fetch) {
1459 /* Restore IF0 */
1460 PVR_PDS_MODE_TOGGLE(
1461 code,
1462 instruction,
1463 pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC */
1464 0, /* Neg */
1465 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC */
1466 1 /* Addr */
1467 ));
1468 }
1469
1470 PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_encode_doutu(1, 1, 0));
1471 PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_inst_encode_halt(0));
1472
1473 assert(running_dma_count == total_dma_count);
1474
1475 for (uint32_t i = 0; i < ARRAY_SIZE(const_usage); i++) {
1476 if (const_usage[i] == 0)
1477 break;
1478
1479 info->data_size_in_dwords =
1480 8 * i + (32 - __builtin_clz((uint32_t)const_usage[i]));
1481 }
1482
1483 info->temps_required = temp_usage.temps_needed;
1484 info->entry_count = entry_write_state.entry_count;
1485 info->entries_written_size_in_bytes =
1486 entry_write_state.entries_size_in_bytes;
1487 info->code_size_in_dwords = instruction;
1488
1489 pvr_debug("=================================================\n");
1490 }
1491
pvr_pds_generate_descriptor_upload_program(struct pvr_descriptor_program_input * input_program,uint32_t * code_section,struct pvr_pds_info * info)1492 void pvr_pds_generate_descriptor_upload_program(
1493 struct pvr_descriptor_program_input *input_program,
1494 uint32_t *code_section,
1495 struct pvr_pds_info *info)
1496 {
1497 unsigned int num_consts64;
1498 unsigned int num_consts32;
1499 unsigned int next_const64;
1500 unsigned int next_const32;
1501 unsigned int instruction = 0;
1502 uint32_t compile_time_buffer_index = 0;
1503
1504 unsigned int total_dma_count = 0;
1505 unsigned int running_dma_count = 0;
1506
1507 struct pvr_pds_const_map_entry_write_state entry_write_state;
1508
1509 /* Calculate the total register usage so we can stick 32-bit consts
1510 * after 64. Each DOUTD/DDMAD requires 1 32-bit constant and 1 64-bit
1511 * constant.
1512 */
1513 num_consts32 = input_program->descriptor_set_count;
1514 num_consts64 = input_program->descriptor_set_count;
1515 total_dma_count = input_program->descriptor_set_count;
1516
1517 pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
1518
1519 for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1520 struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1521
1522 /* This switch statement looks pointless but we want to optimize DMAs
1523 * that can be done as a DOUTW.
1524 */
1525 switch (buffer->type) {
1526 default: {
1527 /* 1 DOUTD per compile time buffer: */
1528 num_consts32++;
1529 num_consts64++;
1530 total_dma_count++;
1531 break;
1532 }
1533 }
1534 }
1535
1536 /* DOUTU for the secondary update program requires a 64-bit constant. */
1537 if (input_program->secondary_program_present)
1538 num_consts64++;
1539
1540 info->data_size_in_dwords = (num_consts64 * 2) + (num_consts32);
1541
1542 /* Start counting constants. */
1543 next_const64 = 0;
1544 next_const32 = num_consts64 * 2;
1545
1546 /* For each descriptor set perform a DOUTD. */
1547 for (unsigned int descriptor_index = 0;
1548 descriptor_index < input_program->descriptor_set_count;
1549 descriptor_index++) {
1550 struct pvr_const_map_entry_descriptor_set *descriptor_set_entry;
1551 struct pvr_pds_descriptor_set *descriptor_set =
1552 &input_program->descriptor_sets[descriptor_index];
1553
1554 bool last_DMA = (++running_dma_count == total_dma_count);
1555 bool halt = last_DMA && !input_program->secondary_program_present;
1556
1557 descriptor_set_entry =
1558 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1559 sizeof(*descriptor_set_entry));
1560 descriptor_set_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET;
1561 descriptor_set_entry->const_offset = next_const64 * 2;
1562 descriptor_set_entry->descriptor_set = descriptor_set->descriptor_set;
1563 descriptor_set_entry->primary = descriptor_set->primary;
1564 descriptor_set_entry->offset_in_dwords = descriptor_set->offset_in_dwords;
1565
1566 PVR_PDS_MODE_TOGGLE(code_section,
1567 instruction,
1568 pvr_encode_burst_cs(&entry_write_state,
1569 last_DMA,
1570 halt,
1571 next_const32,
1572 next_const64,
1573 descriptor_set->size_in_dwords,
1574 descriptor_set->destination));
1575
1576 next_const64++;
1577 next_const32++;
1578 }
1579
1580 for (unsigned int index = 0; index < input_program->buffer_count; index++) {
1581 struct pvr_pds_buffer *buffer = &input_program->buffers[index];
1582
1583 bool last_DMA = (++running_dma_count == total_dma_count);
1584 bool halt = last_DMA && !input_program->secondary_program_present;
1585
1586 switch (buffer->type) {
1587 case PVR_BUFFER_TYPE_PUSH_CONSTS: {
1588 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1589
1590 special_buffer_entry =
1591 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1592 sizeof(*special_buffer_entry));
1593 special_buffer_entry->type =
1594 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1595 special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_PUSH_CONSTS;
1596 special_buffer_entry->buffer_index = buffer->source_offset;
1597 break;
1598 }
1599 case PVR_BUFFER_TYPE_DYNAMIC: {
1600 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1601
1602 special_buffer_entry =
1603 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1604 sizeof(*special_buffer_entry));
1605 special_buffer_entry->type =
1606 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1607 special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_DYNAMIC;
1608 special_buffer_entry->buffer_index = buffer->source_offset;
1609 break;
1610 }
1611 case PVR_BUFFER_TYPES_COMPILE_TIME: {
1612 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1613
1614 special_buffer_entry =
1615 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1616 sizeof(*special_buffer_entry));
1617 special_buffer_entry->type =
1618 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1619 special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_COMPILE_TIME;
1620 special_buffer_entry->buffer_index = compile_time_buffer_index++;
1621 break;
1622 }
1623 case PVR_BUFFER_TYPES_BUFFER_LENGTHS: {
1624 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1625
1626 special_buffer_entry =
1627 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1628 sizeof(*special_buffer_entry));
1629 special_buffer_entry->type =
1630 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1631 special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_BUFFER_LENGTHS;
1632 break;
1633 }
1634 case PVR_BUFFER_TYPE_BLEND_CONSTS: {
1635 struct pvr_const_map_entry_special_buffer *special_buffer_entry;
1636
1637 special_buffer_entry =
1638 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1639 sizeof(*special_buffer_entry));
1640 special_buffer_entry->type =
1641 PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
1642 special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BLEND_CONSTS;
1643 special_buffer_entry->buffer_index =
1644 input_program->blend_constants_used_mask;
1645 break;
1646 }
1647 case PVR_BUFFER_TYPE_UBO: {
1648 struct pvr_const_map_entry_constant_buffer *constant_buffer_entry;
1649
1650 constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1651 &entry_write_state,
1652 sizeof(*constant_buffer_entry));
1653 constant_buffer_entry->type =
1654 PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER;
1655 constant_buffer_entry->buffer_id = buffer->buffer_id;
1656 constant_buffer_entry->desc_set = buffer->desc_set;
1657 constant_buffer_entry->binding = buffer->binding;
1658 constant_buffer_entry->offset = buffer->source_offset;
1659 constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1660 break;
1661 }
1662 case PVR_BUFFER_TYPES_UBO_ZEROING: {
1663 struct pvr_const_map_entry_constant_buffer_zeroing
1664 *constant_buffer_entry;
1665
1666 constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
1667 &entry_write_state,
1668 sizeof(*constant_buffer_entry));
1669 constant_buffer_entry->type =
1670 PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING;
1671 constant_buffer_entry->buffer_id = buffer->buffer_id;
1672 constant_buffer_entry->offset = buffer->source_offset;
1673 constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
1674 break;
1675 }
1676 }
1677
1678 entry_write_state.entry->const_offset = next_const64 * 2;
1679
1680 PVR_PDS_MODE_TOGGLE(code_section,
1681 instruction,
1682 pvr_encode_burst_cs(&entry_write_state,
1683 last_DMA,
1684 halt,
1685 next_const32,
1686 next_const64,
1687 buffer->size_in_dwords,
1688 buffer->destination));
1689
1690 next_const64++;
1691 next_const32++;
1692 }
1693
1694 if (total_dma_count != running_dma_count)
1695 fprintf(stderr, "Mismatch in DMA count\n");
1696
1697 if (input_program->secondary_program_present) {
1698 struct pvr_const_map_entry_doutu_address *doutu_address;
1699
1700 PVR_PDS_MODE_TOGGLE(code_section,
1701 instruction,
1702 pvr_pds_encode_doutu(false, true, next_const64));
1703
1704 doutu_address =
1705 pvr_prepare_next_pds_const_map_entry(&entry_write_state,
1706 sizeof(*doutu_address));
1707 doutu_address->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
1708 doutu_address->const_offset = next_const64 * 2;
1709 doutu_address->doutu_control = input_program->secondary_task_control.src0;
1710
1711 next_const64++;
1712 }
1713
1714 if (instruction == 0 && input_program->must_not_be_empty) {
1715 PVR_PDS_MODE_TOGGLE(code_section,
1716 instruction,
1717 pvr_pds_inst_encode_halt(
1718 /* cc */ false));
1719 }
1720
1721 info->entry_count = entry_write_state.entry_count;
1722 info->entries_written_size_in_bytes =
1723 entry_write_state.entries_size_in_bytes;
1724 info->code_size_in_dwords = instruction;
1725 }
1726