1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <string.h>
29
30 #include "pvr_device_info.h"
31 #include "pvr_pds.h"
32 #include "pvr_rogue_pds_defs.h"
33 #include "pvr_rogue_pds_disasm.h"
34 #include "pvr_rogue_pds_encode.h"
35 #include "util/log.h"
36 #include "util/macros.h"
37
38 #define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL))
39 #define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL))
40
41 /*****************************************************************************
42 Macro definitions
43 *****************************************************************************/
44
45 #define PVR_PDS_DWORD_SHIFT 2
46
47 #define PVR_PDS_CONSTANTS_BLOCK_BASE 0
48 #define PVR_PDS_CONSTANTS_BLOCK_SIZE 128
49 #define PVR_PDS_TEMPS_BLOCK_BASE 128
50 #define PVR_PDS_TEMPS_BLOCK_SIZE 32
51
52 #define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK
53 #define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK
54
55 /* Map PDS temp registers to the CDM values they contain Work-group IDs are only
56 * available in the coefficient sync task.
57 */
58 #define PVR_PDS_CDM_WORK_GROUP_ID_X 0
59 #define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
60 #define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
61 /* Local IDs are available in every task. */
62 #define PVR_PDS_CDM_LOCAL_ID_X 0
63 #define PVR_PDS_CDM_LOCAL_ID_YZ 1
64
65 #define PVR_PDS_DOUTW_LOWER32 0x0
66 #define PVR_PDS_DOUTW_UPPER32 0x1
67 #define PVR_PDS_DOUTW_LOWER64 0x2
68 #define PVR_PDS_DOUTW_LOWER128 0x3
69 #define PVR_PDS_DOUTW_MAXMASK 0x4
70
71 #define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U
72 #define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U)
73
74 /*****************************************************************************
75 Static variables
76 *****************************************************************************/
77
78 static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = {
79 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER,
80 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER,
81 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64,
82 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64
83 };
84
85 /* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use
86 * cache_control_const[1].
87 */
88 static const uint32_t cache_control_const[2][2] = {
89 { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS,
90 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED },
91 { 0, 0 }
92 };
93
94 /*****************************************************************************
95 Function definitions
96 *****************************************************************************/
97
pvr_pds_encode_ld_src0(uint64_t dest,uint64_t count8,uint64_t src_add,bool cached,const struct pvr_device_info * dev_info)98 uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
99 uint64_t count8,
100 uint64_t src_add,
101 bool cached,
102 const struct pvr_device_info *dev_info)
103 {
104 uint64_t encoded = 0;
105
106 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
107 encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED
108 : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS);
109 }
110
111 encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
112 << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
113 encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
114 << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
115 encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED
116 : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS);
117 encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK)
118 << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
119
120 return encoded;
121 }
122
pvr_pds_encode_st_src0(uint64_t src,uint64_t count4,uint64_t dst_add,bool write_through,const struct pvr_device_info * device_info)123 uint64_t pvr_pds_encode_st_src0(uint64_t src,
124 uint64_t count4,
125 uint64_t dst_add,
126 bool write_through,
127 const struct pvr_device_info *device_info)
128 {
129 uint64_t encoded = 0;
130
131 if (device_info->features.has_slc_mcu_cache_controls) {
132 encoded |= (write_through
133 ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH
134 : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK);
135 }
136
137 encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
138 << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
139 encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
140 << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
141 encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH
142 : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK);
143 encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK)
144 << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
145
146 return encoded;
147 }
148
149 static ALWAYS_INLINE uint32_t
pvr_pds_encode_doutw_src1(uint32_t dest,uint32_t dword_mask,uint32_t flags,bool cached,const struct pvr_device_info * dev_info)150 pvr_pds_encode_doutw_src1(uint32_t dest,
151 uint32_t dword_mask,
152 uint32_t flags,
153 bool cached,
154 const struct pvr_device_info *dev_info)
155 {
156 assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) ||
157 ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) ||
158 (dword_mask < PVR_PDS_DOUTW_LOWER64));
159
160 uint32_t encoded =
161 (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT);
162
163 encoded |= dword_mask_const[dword_mask];
164
165 encoded |= flags;
166
167 encoded |=
168 cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0
169 : 1]
170 [cached ? 1 : 0];
171 return encoded;
172 }
173
pvr_pds_encode_doutw64(uint32_t cc,uint32_t end,uint32_t src1,uint32_t src0)174 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc,
175 uint32_t end,
176 uint32_t src1,
177 uint32_t src0)
178 {
179 return pvr_pds_inst_encode_dout(cc,
180 end,
181 src1,
182 src0,
183 PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
184 }
185
pvr_pds_encode_doutu(uint32_t cc,uint32_t end,uint32_t src0)186 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
187 uint32_t end,
188 uint32_t src0)
189 {
190 return pvr_pds_inst_encode_dout(cc,
191 end,
192 0,
193 src0,
194 PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
195 }
196
pvr_pds_inst_encode_doutc(uint32_t cc,uint32_t end)197 static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc,
198 uint32_t end)
199 {
200 return pvr_pds_inst_encode_dout(cc,
201 end,
202 0,
203 0,
204 PVR_ROGUE_PDSINST_DSTDOUT_DOUTC);
205 }
206
pvr_pds_encode_doutd(uint32_t cc,uint32_t end,uint32_t src1,uint32_t src0)207 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc,
208 uint32_t end,
209 uint32_t src1,
210 uint32_t src0)
211 {
212 return pvr_pds_inst_encode_dout(cc,
213 end,
214 src1,
215 src0,
216 PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
217 }
218
pvr_pds_encode_douti(uint32_t cc,uint32_t end,uint32_t src0)219 static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc,
220 uint32_t end,
221 uint32_t src0)
222 {
223 return pvr_pds_inst_encode_dout(cc,
224 end,
225 0,
226 src0,
227 PVR_ROGUE_PDSINST_DSTDOUT_DOUTI);
228 }
229
pvr_pds_encode_bra(uint32_t srcc,uint32_t neg,uint32_t setc,int32_t relative_address)230 static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc,
231 uint32_t neg,
232 uint32_t setc,
233 int32_t relative_address)
234 {
235 /* Address should be signed but API only allows unsigned value. */
236 return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address);
237 }
238
239 /**
240 * Gets the next constant address and moves the next constant pointer along.
241 *
242 * \param next_constant Pointer to the next constant address.
243 * \param num_constants The number of constants required.
244 * \param count The number of constants allocated.
245 * \return The address of the next constant.
246 */
pvr_pds_get_constants(uint32_t * next_constant,uint32_t num_constants,uint32_t * count)247 static uint32_t pvr_pds_get_constants(uint32_t *next_constant,
248 uint32_t num_constants,
249 uint32_t *count)
250 {
251 uint32_t constant;
252
253 /* Work out starting constant number. For even number of constants, start on
254 * a 64-bit boundary.
255 */
256 if (num_constants & 1)
257 constant = *next_constant;
258 else
259 constant = (*next_constant + 1) & ~1;
260
261 /* Update the count with the number of constants actually allocated. */
262 *count += constant + num_constants - *next_constant;
263
264 /* Move the next constant pointer. */
265 *next_constant = constant + num_constants;
266
267 assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE);
268
269 return constant;
270 }
271
272 /**
273 * Gets the next temp address and moves the next temp pointer along.
274 *
275 * \param next_temp Pointer to the next temp address.
276 * \param num_temps The number of temps required.
277 * \param count The number of temps allocated.
278 * \return The address of the next temp.
279 */
280 static uint32_t
pvr_pds_get_temps(uint32_t * next_temp,uint32_t num_temps,uint32_t * count)281 pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count)
282 {
283 uint32_t temp;
284
285 /* Work out starting temp number. For even number of temps, start on a
286 * 64-bit boundary.
287 */
288 if (num_temps & 1)
289 temp = *next_temp;
290 else
291 temp = (*next_temp + 1) & ~1;
292
293 /* Update the count with the number of temps actually allocated. */
294 *count += temp + num_temps - *next_temp;
295
296 /* Move the next temp pointer. */
297 *next_temp = temp + num_temps;
298
299 assert((temp + num_temps) <=
300 (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE));
301
302 return temp;
303 }
304
305 /**
306 * Write a 32-bit constant indexed by the long range.
307 *
308 * \param data_block Pointer to data block to write to.
309 * \param index Index within the data to write to.
310 * \param dword The 32-bit constant to write.
311 */
312 static void
pvr_pds_write_constant32(uint32_t * data_block,uint32_t index,uint32_t dword0)313 pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0)
314 {
315 /* Check range. */
316 assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER -
317 PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER));
318
319 data_block[index + 0] = dword0;
320
321 PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index);
322 }
323
324 /**
325 * Write a 64-bit constant indexed by the long range.
326 *
327 * \param data_block Pointer to data block to write to.
328 * \param index Index within the data to write to.
329 * \param dword0 Lower half of the 64 bit constant.
330 * \param dword1 Upper half of the 64 bit constant.
331 */
pvr_pds_write_constant64(uint32_t * data_block,uint32_t index,uint32_t dword0,uint32_t dword1)332 static void pvr_pds_write_constant64(uint32_t *data_block,
333 uint32_t index,
334 uint32_t dword0,
335 uint32_t dword1)
336 {
337 /* Has to be on 64 bit boundary. */
338 assert((index & 1) == 0);
339
340 /* Check range. */
341 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
342 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
343
344 data_block[index + 0] = dword0;
345 data_block[index + 1] = dword1;
346
347 PVR_PDS_PRINT_DATA("WriteConstant64",
348 ((uint64_t)dword0 << 32) | (uint64_t)dword1,
349 index);
350 }
351
352 /**
353 * Write a 64-bit constant from a single wide word indexed by the long-range
354 * number.
355 *
356 * \param data_block Pointer to data block to write to.
357 * \param index Index within the data to write to.
358 * \param word The 64-bit constant to write.
359 */
360
361 static void
pvr_pds_write_wide_constant(uint32_t * data_block,uint32_t index,uint64_t word)362 pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word)
363 {
364 /* Has to be on 64 bit boundary. */
365 assert((index & 1) == 0);
366
367 /* Check range. */
368 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
369 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
370
371 data_block[index + 0] = L32(word);
372 data_block[index + 1] = H32(word);
373
374 PVR_PDS_PRINT_DATA("WriteWideConstant", word, index);
375 }
376
pvr_pds_write_dma_address(uint32_t * data_block,uint32_t index,uint64_t address,bool coherent,const struct pvr_device_info * dev_info)377 static void pvr_pds_write_dma_address(uint32_t *data_block,
378 uint32_t index,
379 uint64_t address,
380 bool coherent,
381 const struct pvr_device_info *dev_info)
382 {
383 /* Has to be on 64 bit boundary. */
384 assert((index & 1) == 0);
385
386 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
387 address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
388
389 /* Check range. */
390 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
391 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
392
393 data_block[index + 0] = L32(address);
394 data_block[index + 1] = H32(address);
395
396 PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index);
397 }
398
399 /**
400 * External API to append a 64-bit constant to an existing data segment
401 * allocation.
402 *
403 * \param constants Pointer to start of data segment.
404 * \param constant_value Value to write to constant.
405 * \param data_size The number of constants allocated.
406 * \returns The address of the next constant.
407 */
pvr_pds_append_constant64(uint32_t * constants,uint64_t constant_value,uint32_t * data_size)408 uint32_t pvr_pds_append_constant64(uint32_t *constants,
409 uint64_t constant_value,
410 uint32_t *data_size)
411 {
412 /* Calculate next constant from current data size. */
413 uint32_t next_constant = *data_size;
414 uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size);
415
416 /* Set the value. */
417 pvr_pds_write_wide_constant(constants, constant, constant_value);
418
419 return constant;
420 }
421
pvr_pds_pixel_shader_sa_initialize(struct pvr_pds_pixel_shader_sa_program * program)422 void pvr_pds_pixel_shader_sa_initialize(
423 struct pvr_pds_pixel_shader_sa_program *program)
424 {
425 memset(program, 0, sizeof(*program));
426 }
427
428 /**
429 * Encode a DMA burst.
430 *
431 * \param dma_control DMA control words.
432 * \param dma_address DMA address.
433 * \param dest_offset Destination offset in the attribute.
434 * \param dma_size The size of the DMA in words.
435 * \param src_address Source address for the burst.
436 * \param dev_info PVR device info structure.
437 * \returns The number of DMA transfers required.
438 */
439
pvr_pds_encode_dma_burst(uint32_t * dma_control,uint64_t * dma_address,uint32_t dest_offset,uint32_t dma_size,uint64_t src_address,const struct pvr_device_info * dev_info)440 uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
441 uint64_t *dma_address,
442 uint32_t dest_offset,
443 uint32_t dma_size,
444 uint64_t src_address,
445 const struct pvr_device_info *dev_info)
446 {
447 /* Simplified for MS2. */
448
449 /* Force to 1 DMA. */
450 const uint32_t num_kicks = 1;
451
452 dma_control[0] = dma_size
453 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
454 dma_control[0] |= dest_offset
455 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
456
457 dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
458 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
459
460 dma_address[0] = src_address;
461 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
462 dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
463 }
464
465 return num_kicks;
466 }
467
468 /* FIXME: use the csbgen interface and pvr_csb_pack.
469 * FIXME: use bool for phase_rate_change.
470 */
471 /**
472 * Sets up the USC control words for a DOUTU.
473 *
474 * \param usc_task_control USC task control structure to be setup.
475 * \param execution_address USC execution virtual address.
476 * \param usc_temps Number of USC temps.
477 * \param sample_rate Sample rate for the DOUTU.
478 * \param phase_rate_change Phase rate change for the DOUTU.
479 */
pvr_pds_setup_doutu(struct pvr_pds_usc_task_control * usc_task_control,uint64_t execution_address,uint32_t usc_temps,uint32_t sample_rate,bool phase_rate_change)480 void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
481 uint64_t execution_address,
482 uint32_t usc_temps,
483 uint32_t sample_rate,
484 bool phase_rate_change)
485 {
486 usc_task_control->src0 = UINT64_C(0);
487
488 /* Set the execution address. */
489 pvr_set_usc_execution_address64(&(usc_task_control->src0),
490 execution_address);
491
492 if (usc_temps > 0) {
493 /* Temps are allocated in blocks of 4 dwords. */
494 usc_temps =
495 DIV_ROUND_UP(usc_temps,
496 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE);
497
498 /* Check for losing temps due to too many requested. */
499 assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) ==
500 usc_temps);
501
502 usc_task_control->src0 |=
503 ((uint64_t)(usc_temps &
504 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK))
505 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT;
506 }
507
508 if (sample_rate > 0) {
509 usc_task_control->src0 |=
510 ((uint64_t)sample_rate)
511 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT;
512 }
513
514 if (phase_rate_change) {
515 usc_task_control->src0 |=
516 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN;
517 }
518 }
519
520 /**
521 * Generates the PDS pixel event program.
522 *
523 * \param program Pointer to the PDS pixel event program.
524 * \param buffer Pointer to the buffer for the program.
525 * \param gen_mode Generate either a data segment or code segment.
526 * \param dev_info PVR device info structure.
527 * \returns Pointer to just beyond the buffer for the program.
528 */
529 uint32_t *
pvr_pds_generate_pixel_event(struct pvr_pds_event_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)530 pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
531 uint32_t *restrict buffer,
532 enum pvr_pds_generate_mode gen_mode,
533 const struct pvr_device_info *dev_info)
534 {
535 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
536 uint32_t *constants = buffer;
537
538 uint32_t data_size = 0;
539
540 /* Copy the DMA control words and USC task control words to constants, then
541 * arrange them so that the 64-bit words are together followed by the 32-bit
542 * words.
543 */
544 uint32_t control_constant =
545 pvr_pds_get_constants(&next_constant, 2, &data_size);
546 uint32_t emit_constant =
547 pvr_pds_get_constants(&next_constant,
548 (2 * program->num_emit_word_pairs),
549 &data_size);
550
551 uint32_t control_word_constant =
552 pvr_pds_get_constants(&next_constant,
553 program->num_emit_word_pairs,
554 &data_size);
555
556 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
557 /* Src0 for DOUTU. */
558 pvr_pds_write_wide_constant(buffer,
559 control_constant,
560 program->task_control.src0); /* DOUTU */
561 /* 64-bit Src0. */
562
563 /* Emit words for end of tile program. */
564 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
565 pvr_pds_write_constant64(constants,
566 emit_constant + (2 * i),
567 program->emit_words[(2 * i) + 0],
568 program->emit_words[(2 * i) + 1]);
569 }
570
571 /* Control words. */
572 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
573 uint32_t doutw = pvr_pds_encode_doutw_src1(
574 (2 * i),
575 PVR_PDS_DOUTW_LOWER64,
576 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
577 false,
578 dev_info);
579
580 if (i == (program->num_emit_word_pairs - 1))
581 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
582
583 pvr_pds_write_constant32(constants, control_word_constant + i, doutw);
584 }
585 }
586
587 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
588 /* DOUTW the state into the shared register. */
589 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
590 *buffer++ = pvr_pds_encode_doutw64(
591 /* cc */ 0,
592 /* END */ 0,
593 /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */
594 /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0
595 */
596 }
597
598 /* Kick the USC. */
599 *buffer++ = pvr_pds_encode_doutu(
600 /* cc */ 0,
601 /* END */ 1,
602 /* SRC0 */ control_constant >> 1);
603 }
604
605 uint32_t code_size = 1 + program->num_emit_word_pairs;
606
607 /* Save the data segment Pointer and size. */
608 program->data_segment = constants;
609 program->data_size = data_size;
610 program->code_size = code_size;
611
612 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
613 return (constants + next_constant);
614
615 if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
616 return buffer;
617
618 return NULL;
619 }
620
621 /**
622 * Checks if any of the vertex streams contains instance data.
623 *
624 * \param streams Streams contained in the vertex shader.
625 * \param num_streams Number of vertex streams.
626 * \returns true if one or more of the given vertex streams contains
627 * instance data, otherwise false.
628 */
pvr_pds_vertex_streams_contains_instance_data(const struct pvr_pds_vertex_stream * streams,uint32_t num_streams)629 static bool pvr_pds_vertex_streams_contains_instance_data(
630 const struct pvr_pds_vertex_stream *streams,
631 uint32_t num_streams)
632 {
633 for (uint32_t i = 0; i < num_streams; i++) {
634 const struct pvr_pds_vertex_stream *vertex_stream = &streams[i];
635 if (vertex_stream->instance_data)
636 return true;
637 }
638
639 return false;
640 }
641
pvr_pds_get_bank_based_constants(uint32_t num_backs,uint32_t * next_constant,uint32_t num_constants,uint32_t * count)642 static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs,
643 uint32_t *next_constant,
644 uint32_t num_constants,
645 uint32_t *count)
646 {
647 /* Allocate constant for PDS vertex shader where constant is divided into
648 * banks.
649 */
650 uint32_t constant;
651
652 assert(num_constants == 1 || num_constants == 2);
653
654 if (*next_constant >= (num_backs << 3))
655 return pvr_pds_get_constants(next_constant, num_constants, count);
656
657 if ((*next_constant % 8) == 0) {
658 constant = *next_constant;
659
660 if (num_constants == 1)
661 *next_constant += 1;
662 else
663 *next_constant += 8;
664 } else if (num_constants == 1) {
665 constant = *next_constant;
666 *next_constant += 7;
667 } else {
668 *next_constant += 7;
669 constant = *next_constant;
670
671 if (*next_constant >= (num_backs << 3)) {
672 *next_constant += 2;
673 *count += 2;
674 } else {
675 *next_constant += 8;
676 }
677 }
678 return constant;
679 }
680
681 /**
682 * Generates a PDS program to load USC vertex inputs based from one or more
683 * vertex buffers, each containing potentially multiple elements, and then a
684 * DOUTU to execute the USC.
685 *
686 * \param program Pointer to the description of the program which should be
687 * generated.
688 * \param buffer Pointer to buffer that receives the output of this function.
689 * Will either be the data segment or code segment depending on
690 * gen_mode.
691 * \param gen_mode Which part to generate, either data segment or
692 * code segment. If PDS_GENERATE_SIZES is specified, nothing is
693 * written, but size information in program is updated.
694 * \param dev_info PVR device info structure.
695 * \returns Pointer to just beyond the buffer for the data - i.e the value
696 * of the buffer after writing its contents.
697 */
698 uint32_t *
pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)699 pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
700 uint32_t *restrict buffer,
701 enum pvr_pds_generate_mode gen_mode,
702 const struct pvr_device_info *dev_info)
703 {
704 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
705 uint32_t next_stream_constant;
706 uint32_t next_temp;
707 uint32_t usc_control_constant64;
708 uint32_t stride_constant32 = 0;
709 uint32_t dma_address_constant64 = 0;
710 uint32_t dma_control_constant64;
711 uint32_t multiplier_constant32 = 0;
712 uint32_t base_instance_const32 = 0;
713
714 uint32_t temp = 0;
715 uint32_t index_temp64 = 0;
716 uint32_t num_vertices_temp64 = 0;
717 uint32_t pre_index_temp = (uint32_t)(-1);
718 bool first_ddmadt = true;
719 uint32_t input_register0;
720 uint32_t input_register1;
721 uint32_t input_register2;
722
723 struct pvr_pds_vertex_stream *vertex_stream;
724 struct pvr_pds_vertex_element *vertex_element;
725 uint32_t shift_2s_comp;
726
727 uint32_t data_size = 0;
728 uint32_t code_size = 0;
729 uint32_t temps_used = 0;
730
731 bool direct_writes_needed = false;
732
733 uint32_t consts_size = 0;
734 uint32_t vertex_id_control_word_const32 = 0;
735 uint32_t instance_id_control_word_const32 = 0;
736 uint32_t instance_id_modifier_word_const32 = 0;
737 uint32_t geometry_id_control_word_const64 = 0;
738 uint32_t empty_dma_control_constant64 = 0;
739
740 bool any_instanced_stream =
741 pvr_pds_vertex_streams_contains_instance_data(program->streams,
742 program->num_streams);
743
744 uint32_t base_instance_register = 0;
745 uint32_t ddmadt_enables = 0;
746
747 bool issue_empty_ddmad = false;
748 uint32_t last_stream_index = program->num_streams - 1;
749 bool current_p0 = false;
750 uint32_t skip_stream_flag = 0;
751
752 /* Generate the PDS vertex shader data. */
753
754 #if defined(DEBUG)
755 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
756 for (uint32_t i = 0; i < program->data_size; i++)
757 buffer[i] = 0xDEADBEEF;
758 }
759 #endif
760
761 /* Generate the PDS vertex shader program */
762 next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
763 /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */
764 input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
765 /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */
766 input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
767
768 if (program->iterate_remap_id)
769 input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
770 else
771 input_register2 = 0; /* Not used, but need to silence the compiler. */
772
773 /* Generate the PDS vertex shader code. The constants in the data block are
774 * arranged as follows:
775 *
776 * 64 bit bank 0 64 bit bank 1 64 bit bank 2 64 bit bank
777 * 3 Not used (tmps) Stride | Multiplier Address Control
778 */
779
780 /* Find out how many constants are needed by streams. */
781 for (uint32_t stream = 0; stream < program->num_streams; stream++) {
782 pvr_pds_get_constants(&next_constant,
783 8 * program->streams[stream].num_elements,
784 &consts_size);
785 }
786
787 /* If there are no vertex streams allocate the first bank for USC Code
788 * Address.
789 */
790 if (consts_size == 0)
791 pvr_pds_get_constants(&next_constant, 2, &consts_size);
792 else
793 next_constant = 8;
794
795 direct_writes_needed = program->iterate_instance_id ||
796 program->iterate_vtx_id || program->iterate_remap_id;
797
798 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
799 /* Evaluate what config of DDMAD should be used for each stream. */
800 for (uint32_t stream = 0; stream < program->num_streams; stream++) {
801 vertex_stream = &program->streams[stream];
802
803 if (vertex_stream->use_ddmadt) {
804 ddmadt_enables |= (1 << stream);
805
806 /* The condition for index value is:
807 * index * stride + size <= bufferSize (all in unit of byte)
808 */
809 if (vertex_stream->stride == 0) {
810 if (vertex_stream->elements[0].size <=
811 vertex_stream->buffer_size_in_bytes) {
812 /* index can be any value -> no need to use DDMADT. */
813 ddmadt_enables &= (~(1 << stream));
814 } else {
815 /* No index works -> no need to issue DDMAD instruction.
816 */
817 skip_stream_flag |= (1 << stream);
818 }
819 } else {
820 /* index * stride + size <= bufferSize
821 *
822 * can be converted to:
823 * index <= (bufferSize - size) / stride
824 *
825 * where maximum index is:
826 * integer((bufferSize - size) / stride).
827 */
828 if (vertex_stream->buffer_size_in_bytes <
829 vertex_stream->elements[0].size) {
830 /* No index works -> no need to issue DDMAD instruction.
831 */
832 skip_stream_flag |= (1 << stream);
833 } else {
834 uint32_t max_index = (vertex_stream->buffer_size_in_bytes -
835 vertex_stream->elements[0].size) /
836 vertex_stream->stride;
837 if (max_index == 0xFFFFFFFFu) {
838 /* No need to use DDMADT as all possible indices can
839 * pass the test.
840 */
841 ddmadt_enables &= (~(1 << stream));
842 } else {
843 /* In this case, test condition can be changed to
844 * index < max_index + 1.
845 */
846 program->streams[stream].num_vertices =
847 pvr_pds_get_bank_based_constants(program->num_streams,
848 &next_constant,
849 1,
850 &consts_size);
851
852 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
853 pvr_pds_write_constant32(
854 buffer,
855 program->streams[stream].num_vertices,
856 max_index + 1);
857 }
858 }
859 }
860 }
861 }
862
863 if ((skip_stream_flag & (1 << stream)) == 0) {
864 issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0;
865 last_stream_index = stream;
866 }
867 }
868 } else {
869 if (program->num_streams > 0 &&
870 program->streams[program->num_streams - 1].use_ddmadt) {
871 issue_empty_ddmad = true;
872 }
873 }
874
875 if (direct_writes_needed)
876 issue_empty_ddmad = false;
877
878 if (issue_empty_ddmad) {
879 /* An empty DMA control const (DMA size = 0) is required in case the
880 * last DDMADD is predicated out and last flag does not have any usage.
881 */
882 empty_dma_control_constant64 =
883 pvr_pds_get_bank_based_constants(program->num_streams,
884 &next_constant,
885 2,
886 &consts_size);
887 }
888
889 /* Assign constants for non stream or base instance if there is any
890 * instanced stream.
891 */
892 if (direct_writes_needed || any_instanced_stream ||
893 program->instance_ID_modifier) {
894 if (program->iterate_vtx_id) {
895 vertex_id_control_word_const32 =
896 pvr_pds_get_bank_based_constants(program->num_streams,
897 &next_constant,
898 1,
899 &consts_size);
900 }
901
902 if (program->iterate_instance_id || program->instance_ID_modifier) {
903 if (program->instance_ID_modifier == 0) {
904 instance_id_control_word_const32 =
905 pvr_pds_get_bank_based_constants(program->num_streams,
906 &next_constant,
907 1,
908 &consts_size);
909 } else {
910 instance_id_modifier_word_const32 =
911 pvr_pds_get_bank_based_constants(program->num_streams,
912 &next_constant,
913 1,
914 &consts_size);
915 if ((instance_id_modifier_word_const32 % 2) == 0) {
916 instance_id_control_word_const32 =
917 pvr_pds_get_bank_based_constants(program->num_streams,
918 &next_constant,
919 1,
920 &consts_size);
921 } else {
922 instance_id_control_word_const32 =
923 instance_id_modifier_word_const32;
924 instance_id_modifier_word_const32 =
925 pvr_pds_get_bank_based_constants(program->num_streams,
926 &next_constant,
927 1,
928 &consts_size);
929 }
930 }
931 }
932
933 if (program->base_instance != 0) {
934 base_instance_const32 =
935 pvr_pds_get_bank_based_constants(program->num_streams,
936 &next_constant,
937 1,
938 &consts_size);
939 }
940
941 if (program->iterate_remap_id) {
942 geometry_id_control_word_const64 =
943 pvr_pds_get_bank_based_constants(program->num_streams,
944 &next_constant,
945 2,
946 &consts_size);
947 }
948 }
949
950 if (program->instance_ID_modifier != 0) {
951 /* This instanceID modifier is used when a draw array instanced call
952 * sourcing from client data cannot fit into vertex buffer and needs to
953 * be broken down into several draw calls.
954 */
955
956 code_size += 1;
957
958 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
959 pvr_pds_write_constant32(buffer,
960 instance_id_modifier_word_const32,
961 program->instance_ID_modifier);
962 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
963 *buffer++ = pvr_pds_inst_encode_add32(
964 /* cc */ 0x0,
965 /* ALUM */ 0, /* Unsigned */
966 /* SNA */ 0, /* Add */
967 /* SRC0 32b */ instance_id_modifier_word_const32,
968 /* SRC1 32b */ input_register1,
969 /* DST 32b */ input_register1);
970 }
971 }
972
973 /* Adjust instanceID if necessary. */
974 if (any_instanced_stream || program->iterate_instance_id) {
975 if (program->base_instance != 0) {
976 assert(!program->draw_indirect);
977
978 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
979 pvr_pds_write_constant32(buffer,
980 base_instance_const32,
981 program->base_instance);
982 }
983
984 base_instance_register = base_instance_const32;
985 }
986
987 if (program->draw_indirect) {
988 assert((program->instance_ID_modifier == 0) &&
989 (program->base_instance == 0));
990
991 base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1;
992 }
993 }
994
995 next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
996 usc_control_constant64 =
997 pvr_pds_get_constants(&next_stream_constant, 2, &data_size);
998
999 for (uint32_t stream = 0; stream < program->num_streams; stream++) {
1000 bool instance_data_with_base_instance;
1001
1002 if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1003 ((skip_stream_flag & (1 << stream)) != 0)) {
1004 continue;
1005 }
1006
1007 vertex_stream = &program->streams[stream];
1008
1009 instance_data_with_base_instance =
1010 ((vertex_stream->instance_data) &&
1011 ((program->base_instance > 0) || (program->draw_indirect)));
1012
1013 /* Get all 8 32-bit constants at once, only 6 for first stream due to
1014 * USC constants.
1015 */
1016 if (stream == 0) {
1017 stride_constant32 =
1018 pvr_pds_get_constants(&next_stream_constant, 6, &data_size);
1019 } else {
1020 next_constant =
1021 pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1022
1023 /* Skip bank 0. */
1024 stride_constant32 = next_constant + 2;
1025 }
1026
1027 multiplier_constant32 = stride_constant32 + 1;
1028
1029 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1030 pvr_pds_write_constant32(buffer,
1031 stride_constant32,
1032 vertex_stream->stride);
1033
1034 /* Vertex stream frequency multiplier. */
1035 if (vertex_stream->multiplier)
1036 pvr_pds_write_constant32(buffer,
1037 multiplier_constant32,
1038 vertex_stream->multiplier);
1039 }
1040
1041 /* Update the code size count and temps count for the above code
1042 * segment.
1043 */
1044 if (vertex_stream->current_state) {
1045 code_size += 1;
1046 temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */
1047 } else {
1048 unsigned int num_temps_required = 0;
1049
1050 if (vertex_stream->multiplier) {
1051 num_temps_required += 2;
1052 code_size += 3;
1053
1054 if (vertex_stream->shift) {
1055 code_size += 1;
1056
1057 if ((int32_t)vertex_stream->shift > 0)
1058 code_size += 1;
1059 }
1060 } else if (vertex_stream->shift) {
1061 code_size += 1;
1062 num_temps_required += 1;
1063 } else if (instance_data_with_base_instance) {
1064 num_temps_required += 1;
1065 }
1066
1067 if (num_temps_required != 0) {
1068 temp = pvr_pds_get_temps(&next_temp,
1069 num_temps_required,
1070 &temps_used); /* 64-bit */
1071 } else {
1072 temp = vertex_stream->instance_data ? input_register1
1073 : input_register0;
1074 }
1075
1076 if (instance_data_with_base_instance)
1077 code_size += 1;
1078 }
1079
1080 /* The real code segment. */
1081 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1082 /* If it's current state stream, then index = 0 always. */
1083 if (vertex_stream->current_state) {
1084 /* Put zero in temp. */
1085 *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1086 } else if (vertex_stream->multiplier) {
1087 /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24)
1088 * new: Iout = (Iin * Multiplier) >> (shift+31)
1089 */
1090
1091 /* Put zero in temp. Need zero for add part of the following
1092 * MAD. MAD source is 64 bit, so need two LIMMs.
1093 */
1094 *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1095 /* Put zero in temp. Need zero for add part of the following
1096 * MAD.
1097 */
1098 *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0);
1099
1100 /* old: (Iin * (Multiplier+2^24))
1101 * new: (Iin * Multiplier)
1102 */
1103 *buffer++ = pvr_rogue_inst_encode_mad(
1104 0, /* Sign of add is positive. */
1105 0, /* Unsigned ALU mode */
1106 0, /* Unconditional */
1107 multiplier_constant32,
1108 vertex_stream->instance_data ? input_register1 : input_register0,
1109 temp / 2,
1110 temp / 2);
1111
1112 if (vertex_stream->shift) {
1113 int32_t shift = (int32_t)vertex_stream->shift;
1114
1115 /* new: >> (shift + 31) */
1116 shift += 31;
1117 shift *= -1;
1118
1119 if (shift < -31) {
1120 /* >> (31) */
1121 shift_2s_comp = 0xFFFE1;
1122 *buffer++ = pvr_pds_inst_encode_stflp64(
1123 /* cc */ 0,
1124 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1125 /* IM */ 1, /* enable immediate */
1126 /* SRC0 */ temp / 2,
1127 /* SRC1 */ input_register0, /* This won't be used in
1128 * a shift operation.
1129 */
1130 /* SRC2 (Shift) */ shift_2s_comp,
1131 /* DST */ temp / 2);
1132 shift += 31;
1133 }
1134
1135 /* old: >> (Shift+24)
1136 * new: >> (shift + 31)
1137 */
1138 shift_2s_comp = *((uint32_t *)&shift);
1139 *buffer++ = pvr_pds_inst_encode_stflp64(
1140 /* cc */ 0,
1141 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1142 /* IM */ 1, /*enable immediate */
1143 /* SRC0 */ temp / 2,
1144 /* SRC1 */ input_register0, /* This won't be used in
1145 * a shift operation.
1146 */
1147 /* SRC2 (Shift) */ shift_2s_comp,
1148 /* DST */ temp / 2);
1149 }
1150
1151 if (instance_data_with_base_instance) {
1152 *buffer++ =
1153 pvr_pds_inst_encode_add32(0, /* cc */
1154 0, /* ALNUM */
1155 0, /* SNA */
1156 base_instance_register, /* src0
1157 */
1158 temp, /* src1 */
1159 temp /* dst */
1160 );
1161 }
1162 } else { /* NOT vertex_stream->multiplier */
1163 if (vertex_stream->shift) {
1164 /* Shift Index/InstanceNum Right by shift bits. Put result
1165 * in a Temp.
1166 */
1167
1168 /* 2's complement of shift as this will be a right shift. */
1169 shift_2s_comp = ~(vertex_stream->shift) + 1;
1170
1171 *buffer++ = pvr_pds_inst_encode_stflp32(
1172 /* IM */ 1, /* enable immediate. */
1173 /* cc */ 0,
1174 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1175 /* SRC0 */ vertex_stream->instance_data ? input_register1
1176 : input_register0,
1177 /* SRC1 */ input_register0, /* This won't be used in
1178 * a shift operation.
1179 */
1180 /* SRC2 (Shift) */ shift_2s_comp,
1181 /* DST */ temp);
1182
1183 if (instance_data_with_base_instance) {
1184 *buffer++ =
1185 pvr_pds_inst_encode_add32(0, /* cc */
1186 0, /* ALNUM */
1187 0, /* SNA */
1188 base_instance_register, /* src0
1189 */
1190 temp, /* src1 */
1191 temp /* dst */
1192 );
1193 }
1194 } else {
1195 if (instance_data_with_base_instance) {
1196 *buffer++ =
1197 pvr_pds_inst_encode_add32(0, /* cc */
1198 0, /* ALNUM */
1199 0, /* SNA */
1200 base_instance_register, /* src0
1201 */
1202 input_register1, /* src1 */
1203 temp /* dst */
1204 );
1205 } else {
1206 /* If the shift instruction doesn't happen, use the IR
1207 * directly into the following MAD.
1208 */
1209 temp = vertex_stream->instance_data ? input_register1
1210 : input_register0;
1211 }
1212 }
1213 }
1214 }
1215
1216 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1217 if (vertex_stream->use_ddmadt)
1218 ddmadt_enables |= (1 << stream);
1219 } else {
1220 if ((ddmadt_enables & (1 << stream)) != 0) {
1221 /* Emulate what DDMADT does for range checking. */
1222 if (first_ddmadt) {
1223 /* Get an 64 bits temp such that cmp current index with
1224 * allowed vertex number can work.
1225 */
1226 index_temp64 =
1227 pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1228 */
1229 num_vertices_temp64 =
1230 pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1231 */
1232
1233 index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1234 num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1235
1236 code_size += 3;
1237 current_p0 = true;
1238 }
1239
1240 code_size += (temp == pre_index_temp ? 1 : 2);
1241
1242 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1243 if (first_ddmadt) {
1244 /* Set predicate to be P0. */
1245 *buffer++ = pvr_pds_encode_bra(
1246 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1247 */
1248 0, /* Neg */
1249 PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC
1250 */
1251 1); /* Addr */
1252
1253 *buffer++ =
1254 pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0);
1255 *buffer++ =
1256 pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0);
1257 }
1258
1259 if (temp != pre_index_temp) {
1260 *buffer++ = pvr_pds_inst_encode_stflp32(
1261 /* IM */ 1, /* enable immediate. */
1262 /* cc */ 0,
1263 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1264 /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER,
1265 /* SRC1 */ 0,
1266 /* SRC2 (Shift) */ 0,
1267 /* DST */ index_temp64);
1268 }
1269
1270 *buffer++ = pvr_pds_inst_encode_stflp32(
1271 /* IM */ 1, /* enable immediate. */
1272 /* cc */ 0,
1273 /* LOP */ PVR_ROGUE_PDSINST_LOP_OR,
1274 /* SRC0 */ num_vertices_temp64 + 1,
1275 /* SRC1 */ vertex_stream->num_vertices,
1276 /* SRC2 (Shift) */ 0,
1277 /* DST */ num_vertices_temp64);
1278 }
1279
1280 first_ddmadt = false;
1281
1282 pre_index_temp = temp;
1283 }
1284 }
1285
1286 /* Process the elements in the stream. */
1287 for (uint32_t element = 0; element < vertex_stream->num_elements;
1288 element++) {
1289 bool terminate = false;
1290
1291 vertex_element = &vertex_stream->elements[element];
1292 /* Check if last DDMAD needs terminate or not. */
1293 if ((element == (vertex_stream->num_elements - 1)) &&
1294 (stream == last_stream_index)) {
1295 terminate = !issue_empty_ddmad && !direct_writes_needed;
1296 }
1297
1298 /* Get a new set of constants for this element. */
1299 if (element) {
1300 /* Get all 8 32 bit constants at once. */
1301 next_constant =
1302 pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1303 }
1304
1305 dma_address_constant64 = next_constant + 4;
1306 dma_control_constant64 = dma_address_constant64 + 2;
1307
1308 if (vertex_element->component_size == 0) {
1309 /* Standard DMA.
1310 *
1311 * Write the DMA transfer control words into the PDS data
1312 * section.
1313 *
1314 * DMA Address is 40-bit.
1315 */
1316
1317 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1318 uint32_t dma_control_word;
1319 uint64_t dma_control_word64 = 0;
1320 uint32_t dma_size;
1321
1322 /* Write the address to the constant. */
1323 pvr_pds_write_dma_address(buffer,
1324 dma_address_constant64,
1325 vertex_stream->address +
1326 (uint64_t)vertex_element->offset,
1327 false,
1328 dev_info);
1329 {
1330 if (program->stream_patch_offsets) {
1331 program
1332 ->stream_patch_offsets[program->num_stream_patches++] =
1333 (stream << 16) | (dma_address_constant64 >> 1);
1334 }
1335 }
1336
1337 /* Size is in bytes - round up to nearest 32 bit word. */
1338 dma_size =
1339 (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >>
1340 PVR_PDS_DWORD_SHIFT;
1341
1342 assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER);
1343
1344 /* Set up the dma transfer control word. */
1345 dma_control_word =
1346 dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1347
1348 dma_control_word |=
1349 vertex_element->reg
1350 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1351
1352 dma_control_word |=
1353 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1354 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1355
1356 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1357 if ((ddmadt_enables & (1 << stream)) != 0) {
1358 assert(
1359 ((((uint64_t)vertex_stream->buffer_size_in_bytes
1360 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1361 ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >>
1362 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) ==
1363 (uint64_t)vertex_stream->buffer_size_in_bytes);
1364 dma_control_word64 =
1365 (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN |
1366 (((uint64_t)vertex_stream->buffer_size_in_bytes
1367 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1368 ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK));
1369 }
1370 }
1371 /* If this is the last dma then also set the last flag. */
1372 if (terminate) {
1373 dma_control_word |=
1374 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1375 }
1376
1377 /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1378 * spec.
1379 */
1380 pvr_pds_write_wide_constant(buffer,
1381 dma_control_constant64,
1382 dma_control_word64 |
1383 (uint64_t)dma_control_word);
1384 }
1385
1386 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1387 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1388 if ((ddmadt_enables & (1 << stream)) != 0) {
1389 *buffer++ = pvr_pds_inst_encode_cmp(
1390 0, /* cc enable */
1391 PVR_ROGUE_PDSINST_COP_LT, /* Operation */
1392 index_temp64 >> 1, /* SRC0 (REGS64TP) */
1393 (num_vertices_temp64 >> 1) +
1394 PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1
1395 (REGS64)
1396 */
1397 }
1398 }
1399 /* Multiply by the vertex stream stride and add the base
1400 * followed by a DOUTD.
1401 *
1402 * dmad32 (C0 * T0) + C1, C2
1403 * src0 = stride src1 = index src2 = baseaddr src3 =
1404 * doutd part
1405 */
1406
1407 uint32_t cc;
1408 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1409 cc = 0;
1410 else
1411 cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0;
1412
1413 *buffer++ = pvr_pds_inst_encode_ddmad(
1414 /* cc */ cc,
1415 /* END */ 0,
1416 /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1417 /* SRC1 */ temp, /* Index 32-bit*/
1418 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1419 * Address
1420 * +
1421 * Offset
1422 */
1423 /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1424 * Transfer
1425 * Control
1426 * Word.
1427 */
1428 );
1429 }
1430
1431 if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1432 ((ddmadt_enables & (1 << stream)) != 0)) {
1433 code_size += 1;
1434 }
1435 code_size += 1;
1436 } else {
1437 /* Repeat DMA.
1438 *
1439 * Write the DMA transfer control words into the PDS data
1440 * section.
1441 *
1442 * DMA address is 40-bit.
1443 */
1444
1445 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1446 uint32_t dma_control_word;
1447
1448 /* Write the address to the constant. */
1449 pvr_pds_write_dma_address(buffer,
1450 dma_address_constant64,
1451 vertex_stream->address +
1452 (uint64_t)vertex_element->offset,
1453 false,
1454 dev_info);
1455
1456 /* Set up the DMA transfer control word. */
1457 dma_control_word =
1458 vertex_element->size
1459 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1460
1461 dma_control_word |=
1462 vertex_element->reg
1463 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1464
1465 switch (vertex_element->component_size) {
1466 case 4: {
1467 dma_control_word |=
1468 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR;
1469 break;
1470 }
1471 case 3: {
1472 dma_control_word |=
1473 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE;
1474 break;
1475 }
1476 case 2: {
1477 dma_control_word |=
1478 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO;
1479 break;
1480 }
1481 default: {
1482 dma_control_word |=
1483 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE;
1484 break;
1485 }
1486 }
1487
1488 dma_control_word |=
1489 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT;
1490
1491 dma_control_word |=
1492 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1493 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1494
1495 /* If this is the last dma then also set the last flag. */
1496 if (terminate) {
1497 dma_control_word |=
1498 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1499 }
1500
1501 /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1502 * spec.
1503 */
1504 pvr_pds_write_wide_constant(buffer,
1505 dma_control_constant64,
1506 (uint64_t)dma_control_word);
1507 }
1508
1509 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1510 /* Multiply by the vertex stream stride and add the base
1511 * followed by a DOUTD.
1512 *
1513 * dmad32 (C0 * T0) + C1, C2
1514 * src0 = stride src1 = index src2 = baseaddr src3 =
1515 * doutd part
1516 */
1517 *buffer++ = pvr_pds_inst_encode_ddmad(
1518 /* cc */ 0,
1519 /* END */ 0,
1520 /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1521 /* SRC1 */ temp, /* Index 32-bit*/
1522 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1523 * Address
1524 * +
1525 * Offset.
1526 */
1527 /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1528 * Transfer
1529 * Control
1530 * Word.
1531 */
1532 );
1533 }
1534
1535 code_size += 1;
1536 } /* End of repeat DMA. */
1537 } /* Element loop */
1538 } /* Stream loop */
1539
1540 if (issue_empty_ddmad) {
1541 /* Issue an empty last DDMAD, always executed. */
1542 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1543 pvr_pds_write_wide_constant(
1544 buffer,
1545 empty_dma_control_constant64,
1546 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN);
1547 }
1548
1549 code_size += 1;
1550
1551 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1552 *buffer++ = pvr_pds_inst_encode_ddmad(
1553 /* cc */ 0,
1554 /* END */ 0,
1555 /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1556 /* SRC1 */ temp, /* Index 32-bit*/
1557 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1558 *Address +
1559 *Offset.
1560 */
1561 /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA
1562 * Transfer
1563 * Control
1564 * Word.
1565 */
1566 );
1567 }
1568 }
1569
1570 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1571 if (current_p0) {
1572 code_size += 1;
1573
1574 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1575 /* Revert predicate back to IF0 which is required by DOUTU. */
1576 *buffer++ =
1577 pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1578 */
1579 0, /* Neg */
1580 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC
1581 */
1582 1); /* Addr */
1583 }
1584 }
1585 }
1586 /* Send VertexID if requested. */
1587 if (program->iterate_vtx_id) {
1588 if (program->draw_indirect) {
1589 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1590 *buffer++ = pvr_pds_inst_encode_add32(
1591 /* cc */ 0x0,
1592 /* ALUM */ 0, /* Unsigned */
1593 /* SNA */ 1, /* Minus */
1594 /* SRC0 32b */ input_register0, /* vertexID */
1595 /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base
1596 * vertexID.
1597 */
1598 /* DST 32b */ input_register0);
1599 }
1600
1601 code_size += 1;
1602 }
1603
1604 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1605 uint32_t doutw = pvr_pds_encode_doutw_src1(
1606 program->vtx_id_register,
1607 PVR_PDS_DOUTW_LOWER32,
1608 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1609 false,
1610 dev_info);
1611
1612 if (!program->iterate_instance_id && !program->iterate_remap_id)
1613 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1614
1615 pvr_pds_write_constant32(buffer,
1616 vertex_id_control_word_const32,
1617 doutw);
1618 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1619 *buffer++ = pvr_pds_encode_doutw64(
1620 /* cc */ 0,
1621 /* END */ 0,
1622 /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1
1623 */
1624 /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */
1625 }
1626
1627 code_size += 1;
1628 }
1629
1630 /* Send InstanceID if requested. */
1631 if (program->iterate_instance_id) {
1632 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1633 uint32_t doutw = pvr_pds_encode_doutw_src1(
1634 program->instance_id_register,
1635 PVR_PDS_DOUTW_UPPER32,
1636 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1637 true,
1638 dev_info);
1639
1640 if (!program->iterate_remap_id)
1641 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1642
1643 pvr_pds_write_constant32(buffer,
1644 instance_id_control_word_const32,
1645 doutw);
1646 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1647 *buffer++ = pvr_pds_encode_doutw64(
1648 /* cc */ 0,
1649 /* END */ 0,
1650 /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */
1651 /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */
1652 }
1653
1654 code_size += 1;
1655 }
1656
1657 /* Send remapped index number to vi0. */
1658 if (program->iterate_remap_id) {
1659 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1660 uint32_t doutw = pvr_pds_encode_doutw_src1(
1661 0 /* vi0 */,
1662 PVR_PDS_DOUTW_LOWER32,
1663 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
1664 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
1665 false,
1666 dev_info);
1667
1668 pvr_pds_write_constant64(buffer,
1669 geometry_id_control_word_const64,
1670 doutw,
1671 0);
1672 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1673 *buffer++ = pvr_pds_encode_doutw64(
1674 /* cc */ 0,
1675 /* END */ 0,
1676 /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit
1677 * Src1
1678 */
1679 /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */
1680 }
1681
1682 code_size += 1;
1683 }
1684
1685 /* Copy the USC task control words to constants. */
1686 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1687 pvr_pds_write_wide_constant(buffer,
1688 usc_control_constant64,
1689 program->usc_task_control.src0); /* 64-bit
1690 * Src0
1691 */
1692 if (program->stream_patch_offsets) {
1693 /* USC TaskControl is always the first patch. */
1694 program->stream_patch_offsets[0] = usc_control_constant64 >> 1;
1695 }
1696 }
1697
1698 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1699 /* Conditionally (if last in task) issue the task to the USC
1700 * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2.
1701 */
1702
1703 *buffer++ = pvr_pds_encode_doutu(
1704 /* cc */ 1,
1705 /* END */ 1,
1706 /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */
1707
1708 /* End the program if the Dout did not already end it. */
1709 *buffer++ = pvr_pds_inst_encode_halt(0);
1710 }
1711
1712 code_size += 2;
1713
1714 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1715 /* Set the data segment pointer and ensure we return 1 past the buffer
1716 * ptr.
1717 */
1718 program->data_segment = buffer;
1719
1720 buffer += consts_size;
1721 }
1722
1723 program->temps_used = temps_used;
1724 program->data_size = consts_size;
1725 program->code_size = code_size;
1726 program->ddmadt_enables = ddmadt_enables;
1727 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1728 program->skip_stream_flag = skip_stream_flag;
1729
1730 return buffer;
1731 }
1732
1733 /**
1734 * Generates a PDS program to load USC compute shader global/local/workgroup
1735 * sizes/ids and then a DOUTU to execute the USC.
1736 *
1737 * \param program Pointer to description of the program that should be
1738 * generated.
1739 * \param buffer Pointer to buffer that receives the output of this function.
1740 * This will be either the data segment, or the code depending on
1741 * gen_mode.
1742 * \param gen_mode Which part to generate, either data segment or code segment.
1743 * If PDS_GENERATE_SIZES is specified, nothing is written, but
1744 * size information in program is updated.
1745 * \param dev_info PVR device info struct.
1746 * \returns Pointer to just beyond the buffer for the data - i.e. the value of
1747 * the buffer after writing its contents.
1748 */
1749 uint32_t *
pvr_pds_compute_shader(struct pvr_pds_compute_shader_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)1750 pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
1751 uint32_t *restrict buffer,
1752 enum pvr_pds_generate_mode gen_mode,
1753 const struct pvr_device_info *dev_info)
1754 {
1755 uint32_t usc_control_constant64;
1756 uint32_t usc_control_constant64_coeff_update = 0;
1757 uint32_t zero_constant64 = 0;
1758
1759 uint32_t data_size = 0;
1760 uint32_t code_size = 0;
1761 uint32_t temps_used = 0;
1762 uint32_t doutw = 0;
1763
1764 uint32_t barrier_ctrl_word = 0;
1765 uint32_t barrier_ctrl_word2 = 0;
1766
1767 /* Even though there are 3 IDs for local and global we only need max one
1768 * DOUTW for local, and two for global.
1769 */
1770 uint32_t work_group_id_ctrl_words[2] = { 0 };
1771 uint32_t local_id_ctrl_word = 0;
1772 uint32_t local_input_register;
1773
1774 /* For the constant value to load into ptemp (SW fence). */
1775 uint64_t predicate_ld_src0_constant = 0;
1776 uint32_t cond_render_negate_constant = 0;
1777
1778 uint32_t cond_render_pred_temp;
1779 uint32_t cond_render_negate_temp;
1780
1781 /* 2x 64 bit registers that will mask out the Predicate load. */
1782 uint32_t cond_render_pred_mask_constant = 0;
1783
1784 #if defined(DEBUG)
1785 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1786 for (uint32_t j = 0; j < program->data_size; j++)
1787 buffer[j] = 0xDEADBEEF;
1788 }
1789 #endif
1790
1791 /* All the compute input registers are in temps. */
1792 temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS;
1793
1794 uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used;
1795
1796 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
1797
1798 if (program->kick_usc) {
1799 /* Copy the USC task control words to constants. */
1800 usc_control_constant64 =
1801 pvr_pds_get_constants(&next_constant, 2, &data_size);
1802 }
1803
1804 if (program->has_coefficient_update_task) {
1805 usc_control_constant64_coeff_update =
1806 pvr_pds_get_constants(&next_constant, 2, &data_size);
1807 }
1808
1809 if (program->conditional_render) {
1810 predicate_ld_src0_constant =
1811 pvr_pds_get_constants(&next_constant, 2, &data_size);
1812 cond_render_negate_constant =
1813 pvr_pds_get_constants(&next_constant, 2, &data_size);
1814 cond_render_pred_mask_constant =
1815 pvr_pds_get_constants(&next_constant, 4, &data_size);
1816
1817 /* LD will load a 64 bit value. */
1818 cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used);
1819 cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used);
1820
1821 program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant;
1822 program->cond_render_pred_temp = cond_render_pred_temp;
1823 }
1824
1825 if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1826 (program->clear_pds_barrier) ||
1827 (program->kick_usc && program->conditional_render)) {
1828 zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
1829 }
1830
1831 if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1832 barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1833 if (PVR_HAS_QUIRK(dev_info, 51210)) {
1834 barrier_ctrl_word2 =
1835 pvr_pds_get_constants(&next_constant, 1, &data_size);
1836 }
1837 }
1838
1839 if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
1840 program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1841 work_group_id_ctrl_words[0] =
1842 pvr_pds_get_constants(&next_constant, 1, &data_size);
1843 }
1844
1845 if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1846 work_group_id_ctrl_words[1] =
1847 pvr_pds_get_constants(&next_constant, 1, &data_size);
1848 }
1849
1850 if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1851 (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1852 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1853 local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1854 }
1855
1856 if (program->add_base_workgroup) {
1857 for (uint32_t workgroup_component = 0; workgroup_component < 3;
1858 workgroup_component++) {
1859 if (program->work_group_input_regs[workgroup_component] !=
1860 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1861 program
1862 ->base_workgroup_constant_offset_in_dwords[workgroup_component] =
1863 pvr_pds_get_constants(&next_constant, 1, &data_size);
1864 }
1865 }
1866 }
1867
1868 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1869 if (program->kick_usc) {
1870 /* Src0 for DOUTU */
1871 pvr_pds_write_wide_constant(buffer,
1872 usc_control_constant64,
1873 program->usc_task_control.src0); /* 64-bit
1874 * Src0.
1875 */
1876 }
1877
1878 if (program->has_coefficient_update_task) {
1879 /* Src0 for DOUTU. */
1880 pvr_pds_write_wide_constant(
1881 buffer,
1882 usc_control_constant64_coeff_update,
1883 program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
1884 }
1885
1886 if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
1887 (program->clear_pds_barrier) ||
1888 (program->kick_usc && program->conditional_render)) {
1889 pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
1890 * Src0
1891 */
1892 }
1893
1894 if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1895 if (PVR_HAS_QUIRK(dev_info, 51210)) {
1896 /* Write the constant for the coefficient register write. */
1897 doutw = pvr_pds_encode_doutw_src1(
1898 program->barrier_coefficient + 4,
1899 PVR_PDS_DOUTW_LOWER64,
1900 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1901 true,
1902 dev_info);
1903 pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw);
1904 }
1905 /* Write the constant for the coefficient register write. */
1906 doutw = pvr_pds_encode_doutw_src1(
1907 program->barrier_coefficient,
1908 PVR_PDS_DOUTW_LOWER64,
1909 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1910 true,
1911 dev_info);
1912
1913 /* Check whether the barrier is going to be the last DOUTW done by
1914 * the coefficient sync task.
1915 */
1916 if ((program->work_group_input_regs[0] ==
1917 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1918 (program->work_group_input_regs[1] ==
1919 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
1920 (program->work_group_input_regs[2] ==
1921 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
1922 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1923 }
1924
1925 pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
1926 }
1927
1928 /* If we want work-group id X, see if we also want work-group id Y. */
1929 if (program->work_group_input_regs[0] !=
1930 PVR_PDS_COMPUTE_INPUT_REG_UNUSED &&
1931 program->work_group_input_regs[1] !=
1932 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1933 /* Make sure we are going to DOUTW them into adjacent registers
1934 * otherwise we can't do it in one.
1935 */
1936 assert(program->work_group_input_regs[1] ==
1937 (program->work_group_input_regs[0] + 1));
1938
1939 doutw = pvr_pds_encode_doutw_src1(
1940 program->work_group_input_regs[0],
1941 PVR_PDS_DOUTW_LOWER64,
1942 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1943 true,
1944 dev_info);
1945
1946 /* If we don't want the Z work-group id then this is the last one.
1947 */
1948 if (program->work_group_input_regs[2] ==
1949 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1950 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1951 }
1952
1953 pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
1954 }
1955 /* If we only want one of X or Y then handle them separately. */
1956 else {
1957 if (program->work_group_input_regs[0] !=
1958 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1959 doutw = pvr_pds_encode_doutw_src1(
1960 program->work_group_input_regs[0],
1961 PVR_PDS_DOUTW_LOWER32,
1962 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1963 true,
1964 dev_info);
1965
1966 /* If we don't want the Z work-group id then this is the last
1967 * one.
1968 */
1969 if (program->work_group_input_regs[2] ==
1970 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1971 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1972 }
1973
1974 pvr_pds_write_constant32(buffer,
1975 work_group_id_ctrl_words[0],
1976 doutw);
1977 } else if (program->work_group_input_regs[1] !=
1978 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1979 doutw = pvr_pds_encode_doutw_src1(
1980 program->work_group_input_regs[1],
1981 PVR_PDS_DOUTW_UPPER32,
1982 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1983 true,
1984 dev_info);
1985
1986 /* If we don't want the Z work-group id then this is the last
1987 * one.
1988 */
1989 if (program->work_group_input_regs[2] ==
1990 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
1991 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1992 }
1993
1994 pvr_pds_write_constant32(buffer,
1995 work_group_id_ctrl_words[0],
1996 doutw);
1997 }
1998 }
1999
2000 /* Handle work-group id Z. */
2001 if (program->work_group_input_regs[2] !=
2002 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2003 doutw = pvr_pds_encode_doutw_src1(
2004 program->work_group_input_regs[2],
2005 PVR_PDS_DOUTW_UPPER32,
2006 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
2007 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2008 true,
2009 dev_info);
2010
2011 pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
2012 }
2013
2014 /* Handle the local IDs. */
2015 if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2016 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2017 uint32_t dest_reg;
2018
2019 /* If we want local id Y and Z make sure the compiler wants them in
2020 * the same register.
2021 */
2022 if (!program->flattened_work_groups) {
2023 if ((program->local_input_regs[1] !=
2024 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2025 (program->local_input_regs[2] !=
2026 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2027 assert(program->local_input_regs[1] ==
2028 program->local_input_regs[2]);
2029 }
2030 }
2031
2032 if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2033 dest_reg = program->local_input_regs[1];
2034 else
2035 dest_reg = program->local_input_regs[2];
2036
2037 /* If we want local id X and (Y or Z) then we can do that in a
2038 * single 64-bit DOUTW.
2039 */
2040 if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2041 assert(dest_reg == (program->local_input_regs[0] + 1));
2042
2043 doutw = pvr_pds_encode_doutw_src1(
2044 program->local_input_regs[0],
2045 PVR_PDS_DOUTW_LOWER64,
2046 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2047 true,
2048 dev_info);
2049
2050 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2051
2052 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2053 }
2054 /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW.
2055 */
2056 else {
2057 doutw = pvr_pds_encode_doutw_src1(
2058 dest_reg,
2059 PVR_PDS_DOUTW_UPPER32,
2060 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2061 true,
2062 dev_info);
2063
2064 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2065
2066 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2067 }
2068 }
2069 /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
2070 */
2071 else if (program->local_input_regs[0] !=
2072 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2073 doutw = pvr_pds_encode_doutw_src1(
2074 program->local_input_regs[0],
2075 PVR_PDS_DOUTW_LOWER32,
2076 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
2077 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2078 true,
2079 dev_info);
2080
2081 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2082 }
2083 }
2084
2085 if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
2086 gen_mode == PDS_GENERATE_SIZES) {
2087 const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
2088 #define APPEND(X) \
2089 if (encode) { \
2090 *buffer = X; \
2091 buffer++; \
2092 } else { \
2093 code_size += sizeof(uint32_t); \
2094 }
2095
2096 /* Assert that coeff_update_task_branch_size is > 0 because if it is 0
2097 * then we will be doing an infinite loop.
2098 */
2099 if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
2100 assert(program->coeff_update_task_branch_size > 0);
2101
2102 /* Test whether this is the coefficient update task or not. */
2103 APPEND(
2104 pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */
2105 PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */
2106 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */
2107 program->coeff_update_task_branch_size /* ADDR */));
2108
2109 /* Do we need to initialize the barrier coefficient? */
2110 if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2111 if (PVR_HAS_QUIRK(dev_info, 51210)) {
2112 /* Initialize the second barrier coefficient registers to zero.
2113 */
2114 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2115 0, /* END */
2116 barrier_ctrl_word2, /* SRC1 */
2117 zero_constant64 >> 1)); /* SRC0 */
2118 }
2119 /* Initialize the coefficient register to zero. */
2120 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2121 0, /* END */
2122 barrier_ctrl_word, /* SRC1 */
2123 zero_constant64 >> 1)); /* SRC0 */
2124 }
2125
2126 if (program->add_base_workgroup) {
2127 const uint32_t temp_values[3] = { 0, 1, 3 };
2128 for (uint32_t workgroup_component = 0; workgroup_component < 3;
2129 workgroup_component++) {
2130 if (program->work_group_input_regs[workgroup_component] ==
2131 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
2132 continue;
2133
2134 APPEND(pvr_pds_inst_encode_add32(
2135 /* cc */ 0x0,
2136 /* ALUM */ 0,
2137 /* SNA */ 0,
2138 /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
2139 program->base_workgroup_constant_offset_in_dwords
2140 [workgroup_component],
2141 /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
2142 PVR_PDS_CDM_WORK_GROUP_ID_X +
2143 temp_values[workgroup_component],
2144 /* DST (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
2145 PVR_PDS_CDM_WORK_GROUP_ID_X +
2146 temp_values[workgroup_component]));
2147 }
2148 }
2149
2150 /* If we are going to put the work-group IDs in coefficients then we
2151 * just need to do the DOUTWs.
2152 */
2153 if ((program->work_group_input_regs[0] !=
2154 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2155 (program->work_group_input_regs[1] !=
2156 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2157 uint32_t dest_reg;
2158
2159 if (program->work_group_input_regs[0] !=
2160 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2161 dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
2162 } else {
2163 dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
2164 }
2165
2166 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2167 0, /* END */
2168 work_group_id_ctrl_words[0], /* SRC1
2169 */
2170 dest_reg >> 1)); /* SRC0 */
2171 }
2172
2173 if (program->work_group_input_regs[2] !=
2174 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2175 APPEND(pvr_pds_encode_doutw64(
2176 0, /* cc */
2177 0, /* END */
2178 work_group_id_ctrl_words[1], /* SRC1 */
2179 (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >>
2180 1)); /* SRC0 */
2181 }
2182
2183 /* Issue the task to the USC. */
2184 if (program->kick_usc && program->has_coefficient_update_task) {
2185 APPEND(pvr_pds_encode_doutu(0, /* cc */
2186 1, /* END */
2187 usc_control_constant64_coeff_update >>
2188 1)); /* SRC0; DOUTU 64-bit Src0 */
2189 }
2190
2191 /* Encode a HALT */
2192 APPEND(pvr_pds_inst_encode_halt(0));
2193
2194 /* Set the branch size used to skip the coefficient sync task. */
2195 program->coeff_update_task_branch_size = code_size / sizeof(uint32_t);
2196
2197 /* DOUTW in the local IDs. */
2198
2199 /* If we want X and Y or Z, we only need one DOUTW. */
2200 if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
2201 ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2202 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) {
2203 local_input_register =
2204 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2205 } else {
2206 /* If we just want X. */
2207 if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2208 local_input_register =
2209 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2210 }
2211 /* If we just want Y or Z. */
2212 else if (program->local_input_regs[1] !=
2213 PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
2214 program->local_input_regs[2] !=
2215 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
2216 local_input_register =
2217 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
2218 }
2219 }
2220
2221 if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2222 (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
2223 (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
2224 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2225 0, /* END */
2226 local_id_ctrl_word, /* SRC1 */
2227 local_input_register >> 1)); /* SRC0
2228 */
2229 }
2230
2231 if (program->clear_pds_barrier) {
2232 /* Zero the persistent temp (SW fence for context switch). */
2233 APPEND(pvr_pds_inst_encode_add64(
2234 0, /* cc */
2235 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2236 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2237 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2238 (zero_constant64 >> 1), /* src0 = 0 */
2239 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2240 (zero_constant64 >> 1), /* src1 = 0 */
2241 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest =
2242 * ptemp64[0]
2243 */
2244 }
2245
2246 /* If this is a fence, issue the DOUTC. */
2247 if (program->fence) {
2248 APPEND(pvr_pds_inst_encode_doutc(0, /* cc */
2249 0 /* END */));
2250 }
2251
2252 if (program->kick_usc) {
2253 if (program->conditional_render) {
2254 /* Skip if coefficient update task. */
2255 APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1,
2256 0,
2257 PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2258 16));
2259
2260 /* Load the predicate. */
2261 APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1));
2262
2263 /* Load negate constant into temp for CMP. */
2264 APPEND(pvr_pds_inst_encode_add64(
2265 0, /* cc */
2266 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2267 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2268 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2269 (cond_render_negate_constant >> 1), /* src0 = 0 */
2270 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2271 (zero_constant64 >> 1), /* src1 = 0 */
2272 PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER +
2273 (cond_render_negate_temp >> 1))); /* dest = ptemp64[0]
2274 */
2275
2276 APPEND(pvr_pds_inst_encode_wdf(0));
2277
2278 for (uint32_t i = 0; i < 4; i++) {
2279 APPEND(pvr_pds_inst_encode_stflp32(
2280 1, /* enable immediate */
2281 0, /* cc */
2282 PVR_ROGUE_PDSINST_LOP_AND, /* LOP */
2283 cond_render_pred_temp + i, /* SRC0 */
2284 cond_render_pred_mask_constant + i, /* SRC1 */
2285 0, /* SRC2 (Shift) */
2286 cond_render_pred_temp + i)); /* DST */
2287
2288 APPEND(
2289 pvr_pds_inst_encode_stflp32(1, /* enable immediate */
2290 0, /* cc */
2291 PVR_ROGUE_PDSINST_LOP_OR, /* LOP
2292 */
2293 cond_render_pred_temp + i, /* SRC0
2294 */
2295 cond_render_pred_temp, /* SRC1 */
2296 0, /* SRC2 (Shift) */
2297 cond_render_pred_temp)); /* DST */
2298 }
2299
2300 APPEND(pvr_pds_inst_encode_limm(0, /* cc */
2301 cond_render_pred_temp + 1, /* SRC1
2302 */
2303 0, /* SRC0 */
2304 0)); /* GLOBALREG */
2305
2306 APPEND(pvr_pds_inst_encode_stflp32(1, /* enable immediate */
2307 0, /* cc */
2308 PVR_ROGUE_PDSINST_LOP_XOR, /* LOP
2309 */
2310 cond_render_pred_temp, /* SRC0 */
2311 cond_render_negate_temp, /* SRC1
2312 */
2313 0, /* SRC2 (Shift) */
2314 cond_render_pred_temp)); /* DST
2315 */
2316
2317 /* Check that the predicate is 0. */
2318 APPEND(pvr_pds_inst_encode_cmpi(
2319 0, /* cc */
2320 PVR_ROGUE_PDSINST_COP_EQ, /* LOP */
2321 (cond_render_pred_temp >> 1) +
2322 PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */
2323 0)); /* SRC1 */
2324
2325 /* If predicate is 0, skip DOUTU. */
2326 APPEND(pvr_pds_inst_encode_bra(
2327 PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC:
2328 P0 */
2329 0, /* NEG */
2330 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC:
2331 keep
2332 */
2333 2));
2334 }
2335
2336 /* Issue the task to the USC.
2337 * DoutU src1=USC Code Base address, src2=doutu word 2.
2338 */
2339 APPEND(pvr_pds_encode_doutu(1, /* cc */
2340 1, /* END */
2341 usc_control_constant64 >> 1)); /* SRC0;
2342 * DOUTU
2343 * 64-bit
2344 * Src0.
2345 */
2346 }
2347
2348 /* End the program if the Dout did not already end it. */
2349 APPEND(pvr_pds_inst_encode_halt(0));
2350 #undef APPEND
2351 }
2352
2353 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2354 /* Set the data segment pointer and ensure we return 1 past the buffer
2355 * ptr.
2356 */
2357 program->data_segment = buffer;
2358
2359 buffer += next_constant;
2360 }
2361
2362 /* Require at least one DWORD of PDS data so the program runs. */
2363 data_size = MAX2(1, data_size);
2364
2365 program->temps_used = temps_used;
2366 program->highest_temp = temps_used;
2367 program->data_size = data_size;
2368 if (gen_mode == PDS_GENERATE_SIZES)
2369 program->code_size = code_size;
2370
2371 return buffer;
2372 }
2373
2374 /**
2375 * Generates the PDS vertex shader data or code block. This program will do a
2376 * DMA into USC Constants followed by a DOUTU.
2377 *
2378 * \param program Pointer to the PDS vertex shader program.
2379 * \param buffer Pointer to the buffer for the program.
2380 * \param gen_mode Generate code or data.
2381 * \param dev_info PVR device information struct.
2382 * \returns Pointer to just beyond the code/data.
2383 */
pvr_pds_vertex_shader_sa(struct pvr_pds_vertex_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)2384 uint32_t *pvr_pds_vertex_shader_sa(
2385 struct pvr_pds_vertex_shader_sa_program *restrict program,
2386 uint32_t *restrict buffer,
2387 enum pvr_pds_generate_mode gen_mode,
2388 const struct pvr_device_info *dev_info)
2389 {
2390 uint32_t next_constant;
2391 uint32_t data_size = 0;
2392 uint32_t code_size = 0;
2393
2394 uint32_t usc_control_constant64 = 0;
2395 uint32_t dma_address_constant64 = 0;
2396 uint32_t dma_control_constant32 = 0;
2397 uint32_t doutw_value_constant64 = 0;
2398 uint32_t doutw_control_constant32 = 0;
2399 uint32_t fence_constant_word = 0;
2400 uint32_t *buffer_base;
2401 uint32_t kick_index;
2402
2403 uint32_t total_num_doutw =
2404 program->num_dword_doutw + program->num_q_word_doutw;
2405 uint32_t total_size_dma =
2406 program->num_dword_doutw + 2 * program->num_q_word_doutw;
2407
2408 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2409
2410 /* Copy the DMA control words and USC task control words to constants.
2411 *
2412 * Arrange them so that the 64-bit words are together followed by the 32-bit
2413 * words.
2414 */
2415 if (program->kick_usc) {
2416 usc_control_constant64 =
2417 pvr_pds_get_constants(&next_constant, 2, &data_size);
2418 }
2419
2420 if (program->clear_pds_barrier) {
2421 fence_constant_word =
2422 pvr_pds_get_constants(&next_constant, 2, &data_size);
2423 }
2424 dma_address_constant64 = pvr_pds_get_constants(&next_constant,
2425 2 * program->num_dma_kicks,
2426 &data_size);
2427
2428 /* Assign all unaligned constants together to avoid alignment issues caused
2429 * by pvr_pds_get_constants with even allocation sizes.
2430 */
2431 doutw_value_constant64 = pvr_pds_get_constants(
2432 &next_constant,
2433 total_size_dma + total_num_doutw + program->num_dma_kicks,
2434 &data_size);
2435 doutw_control_constant32 = doutw_value_constant64 + total_size_dma;
2436 dma_control_constant32 = doutw_control_constant32 + total_num_doutw;
2437
2438 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2439 buffer_base = buffer;
2440
2441 if (program->kick_usc) {
2442 /* Src0 for DOUTU. */
2443 pvr_pds_write_wide_constant(buffer_base,
2444 usc_control_constant64,
2445 program->usc_task_control.src0); /* DOUTU
2446 * 64-bit
2447 * Src0.
2448 */
2449 buffer += 2;
2450 }
2451
2452 if (program->clear_pds_barrier) {
2453 /* Encode the fence constant src0. Fence barrier is initialized to
2454 * zero.
2455 */
2456 pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0);
2457 buffer += 2;
2458 }
2459
2460 if (total_num_doutw > 0) {
2461 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2462 /* Write the constant for the coefficient register write. */
2463 pvr_pds_write_constant64(buffer_base,
2464 doutw_value_constant64,
2465 program->q_word_doutw_value[2 * i],
2466 program->q_word_doutw_value[2 * i + 1]);
2467 pvr_pds_write_constant32(
2468 buffer_base,
2469 doutw_control_constant32,
2470 program->q_word_doutw_control[i] |
2471 ((!program->num_dma_kicks && i == total_num_doutw - 1)
2472 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2473 : 0));
2474
2475 doutw_value_constant64 += 2;
2476 doutw_control_constant32 += 1;
2477 }
2478
2479 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2480 /* Write the constant for the coefficient register write. */
2481 pvr_pds_write_constant32(buffer_base,
2482 doutw_value_constant64,
2483 program->dword_doutw_value[i]);
2484 pvr_pds_write_constant32(
2485 buffer_base,
2486 doutw_control_constant32,
2487 program->dword_doutw_control[i] |
2488 ((!program->num_dma_kicks && i == program->num_dword_doutw - 1)
2489 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2490 : 0));
2491
2492 doutw_value_constant64 += 1;
2493 doutw_control_constant32 += 1;
2494 }
2495
2496 buffer += total_size_dma + total_num_doutw;
2497 }
2498
2499 if (program->num_dma_kicks == 1) /* Most-common case. */
2500 {
2501 /* Src0 for DOUTD - Address. */
2502 pvr_pds_write_dma_address(buffer_base,
2503 dma_address_constant64,
2504 program->dma_address[0],
2505 false,
2506 dev_info);
2507
2508 /* Src1 for DOUTD - Control Word. */
2509 pvr_pds_write_constant32(
2510 buffer_base,
2511 dma_control_constant32,
2512 program->dma_control[0] |
2513 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2514
2515 /* Move the buffer ptr along as we will return 1 past the buffer. */
2516 buffer += 3;
2517 } else if (program->num_dma_kicks > 1) {
2518 for (kick_index = 0; kick_index < program->num_dma_kicks - 1;
2519 kick_index++) {
2520 /* Src0 for DOUTD - Address. */
2521 pvr_pds_write_dma_address(buffer_base,
2522 dma_address_constant64,
2523 program->dma_address[kick_index],
2524 false,
2525 dev_info);
2526
2527 /* Src1 for DOUTD - Control Word. */
2528 pvr_pds_write_constant32(buffer_base,
2529 dma_control_constant32,
2530 program->dma_control[kick_index]);
2531 dma_address_constant64 += 2;
2532 dma_control_constant32 += 1;
2533 }
2534
2535 /* Src0 for DOUTD - Address. */
2536 pvr_pds_write_dma_address(buffer_base,
2537 dma_address_constant64,
2538 program->dma_address[kick_index],
2539 false,
2540 dev_info);
2541
2542 /* Src1 for DOUTD - Control Word. */
2543 pvr_pds_write_constant32(
2544 buffer_base,
2545 dma_control_constant32,
2546 program->dma_control[kick_index] |
2547 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2548
2549 buffer += 3 * program->num_dma_kicks;
2550 }
2551 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2552 if (program->clear_pds_barrier) {
2553 /* Zero the persistent temp (SW fence for context switch). */
2554 *buffer++ = pvr_pds_inst_encode_add64(
2555 0, /* cc */
2556 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2557 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2558 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2559 (fence_constant_word >> 1), /* src0 = 0 */
2560 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2561 (fence_constant_word >> 1), /* src1 = 0 */
2562 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
2563 * ptemp[0]
2564 */
2565 }
2566
2567 if (total_num_doutw > 0) {
2568 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2569 /* Set the coefficient register to data value. */
2570 *buffer++ = pvr_pds_encode_doutw64(
2571 /* cc */ 0,
2572 /* END */ !program->num_dma_kicks && !program->kick_usc &&
2573 (i == total_num_doutw - 1),
2574 /* SRC1 */ doutw_control_constant32,
2575 /* SRC0 */ doutw_value_constant64 >> 1);
2576
2577 doutw_value_constant64 += 2;
2578 doutw_control_constant32 += 1;
2579 }
2580
2581 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2582 /* Set the coefficient register to data value. */
2583 *buffer++ = pvr_pds_encode_doutw64(
2584 /* cc */ 0,
2585 /* END */ !program->num_dma_kicks && !program->kick_usc &&
2586 (i == program->num_dword_doutw - 1),
2587 /* SRC1 */ doutw_control_constant32,
2588 /* SRC0 */ doutw_value_constant64 >> 1);
2589
2590 doutw_value_constant64 += 1;
2591 doutw_control_constant32 += 1;
2592 }
2593 }
2594
2595 if (program->num_dma_kicks != 0) {
2596 /* DMA the state into the secondary attributes. */
2597
2598 if (program->num_dma_kicks == 1) /* Most-common case. */
2599 {
2600 *buffer++ = pvr_pds_encode_doutd(
2601 /* cc */ 0,
2602 /* END */ !program->kick_usc,
2603 /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */
2604 /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit
2605 * Src0.
2606 */
2607 } else {
2608 for (kick_index = 0; kick_index < program->num_dma_kicks;
2609 kick_index++) {
2610 *buffer++ = pvr_pds_encode_doutd(
2611 /* cc */ 0,
2612 /* END */ (!program->kick_usc) &&
2613 (kick_index + 1 == program->num_dma_kicks),
2614 /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit
2615 * Src1.
2616 */
2617 /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD
2618 * 64-bit
2619 * Src0.
2620 */
2621 dma_address_constant64 += 2;
2622 dma_control_constant32 += 1;
2623 }
2624 }
2625 }
2626
2627 if (program->kick_usc) {
2628 /* Kick the USC. */
2629 *buffer++ = pvr_pds_encode_doutu(
2630 /* cc */ 0,
2631 /* END */ 1,
2632 /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0.
2633 */
2634 }
2635
2636 if (!program->kick_usc && program->num_dma_kicks == 0 &&
2637 total_num_doutw == 0) {
2638 *buffer++ = pvr_pds_inst_encode_halt(0);
2639 }
2640 }
2641
2642 code_size = program->num_dma_kicks + total_num_doutw;
2643 if (program->clear_pds_barrier)
2644 code_size++; /* ADD64 instruction. */
2645
2646 if (program->kick_usc)
2647 code_size++;
2648
2649 /* If there are no DMAs and no USC kick then code is HALT only. */
2650 if (code_size == 0)
2651 code_size = 1;
2652
2653 program->data_size = data_size;
2654 program->code_size = code_size;
2655
2656 return buffer;
2657 }
2658
2659 /**
2660 * Writes the Uniform Data block for the PDS pixel shader secondary attributes
2661 * program.
2662 *
2663 * \param program Pointer to the PDS pixel shader secondary attributes program.
2664 * \param buffer Pointer to the buffer for the code/data.
2665 * \param gen_mode Either code or data can be generated or sizes only updated.
2666 * \returns Pointer to just beyond the buffer for the program/data.
2667 */
pvr_pds_pixel_shader_uniform_texture_code(struct pvr_pds_pixel_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)2668 uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
2669 struct pvr_pds_pixel_shader_sa_program *restrict program,
2670 uint32_t *restrict buffer,
2671 enum pvr_pds_generate_mode gen_mode)
2672 {
2673 uint32_t *instruction;
2674 uint32_t code_size = 0;
2675 uint32_t data_size = 0;
2676 uint32_t temps_used = 0;
2677 uint32_t next_constant;
2678
2679 assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
2680 0);
2681
2682 assert(gen_mode != PDS_GENERATE_DATA_SEGMENT);
2683
2684 /* clang-format off */
2685 /* Shape of code segment (note: clear is different)
2686 *
2687 * Code
2688 * +------------+
2689 * | BRA if0 |
2690 * | DOUTD |
2691 * | ... |
2692 * | DOUTD.halt |
2693 * | uniform |
2694 * | DOUTD |
2695 * | ... |
2696 * | ... |
2697 * | DOUTW |
2698 * | ... |
2699 * | ... |
2700 * | DOUTU.halt |
2701 * | HALT |
2702 * +------------+
2703 */
2704 /* clang-format on */
2705 instruction = buffer;
2706
2707 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2708
2709 /* The clear color can arrive packed in the right form in the first (or
2710 * first 2) dwords of the shared registers and the program will issue a
2711 * single doutw for this.
2712 */
2713 if (program->clear && program->packed_clear) {
2714 uint32_t color_constant1 =
2715 pvr_pds_get_constants(&next_constant, 2, &data_size);
2716
2717 uint32_t control_word_constant1 =
2718 pvr_pds_get_constants(&next_constant, 2, &data_size);
2719
2720 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2721 /* DOUTW the clear color to the USC constants. Predicate with
2722 * uniform loading flag (IF0).
2723 */
2724 *instruction++ = pvr_pds_encode_doutw64(
2725 /* cc */ 1, /* Only for uniform loading program. */
2726 /* END */ program->kick_usc ? 0 : 1, /* Last
2727 * instruction
2728 * for a clear.
2729 */
2730 /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2731 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2732
2733 code_size += 1;
2734 }
2735 } else if (program->clear) {
2736 uint32_t color_constant1, color_constant2;
2737
2738 if (program->clear_color_dest_reg & 0x1) {
2739 uint32_t color_constant3, control_word_constant1,
2740 control_word_constant2, color_constant4;
2741
2742 color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2743 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2744 color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2745
2746 control_word_constant1 =
2747 pvr_pds_get_constants(&next_constant, 2, &data_size);
2748 control_word_constant2 =
2749 pvr_pds_get_constants(&next_constant, 2, &data_size);
2750 color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2751
2752 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2753 /* DOUTW the clear color to the USSE constants. Predicate with
2754 * uniform loading flag (IF0).
2755 */
2756 *instruction++ = pvr_pds_encode_doutw64(
2757 /* cc */ 1, /* Only for Uniform Loading program */
2758 /* END */ 0,
2759 /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2760 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2761
2762 *instruction++ = pvr_pds_encode_doutw64(
2763 /* cc */ 1, /* Only for Uniform Loading program */
2764 /* END */ 0,
2765 /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */
2766 /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2767
2768 *instruction++ = pvr_pds_encode_doutw64(
2769 /* cc */ 1, /* Only for uniform loading program */
2770 /* END */ program->kick_usc ? 0 : 1, /* Last
2771 * instruction
2772 * for a clear.
2773 */
2774 /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */
2775 /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */
2776 }
2777
2778 code_size += 3;
2779 } else {
2780 uint32_t control_word_constant, control_word_last_constant;
2781
2782 /* Put the clear color and control words into the first 8
2783 * constants.
2784 */
2785 color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2786 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2787 control_word_constant =
2788 pvr_pds_get_constants(&next_constant, 2, &data_size);
2789 control_word_last_constant =
2790 pvr_pds_get_constants(&next_constant, 2, &data_size);
2791
2792 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2793 /* DOUTW the clear color to the USSE constants. Predicate with
2794 * uniform loading flag (IF0).
2795 */
2796 *instruction++ = pvr_pds_encode_doutw64(
2797 /* cc */ 1, /* Only for Uniform Loading program */
2798 /* END */ 0,
2799 /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */
2800 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2801
2802 *instruction++ = pvr_pds_encode_doutw64(
2803 /* cc */ 1, /* Only for uniform loading program */
2804 /* END */ program->kick_usc ? 0 : 1, /* Last
2805 * instruction
2806 * for a clear.
2807 */
2808 /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1
2809 */
2810 /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2811 }
2812
2813 code_size += 2;
2814 }
2815
2816 if (program->kick_usc) {
2817 uint32_t doutu_constant64;
2818
2819 doutu_constant64 =
2820 pvr_pds_get_constants(&next_constant, 2, &data_size);
2821
2822 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2823 /* Issue the task to the USC.
2824 *
2825 * dout ds1[constant_use], ds0[constant_use],
2826 * ds1[constant_use], emit
2827 */
2828 *instruction++ = pvr_pds_encode_doutu(
2829 /* cc */ 0,
2830 /* END */ 1,
2831 /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0
2832 */
2833 }
2834
2835 code_size += 1;
2836 }
2837
2838 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2839 /* End the program. */
2840 *instruction++ = pvr_pds_inst_encode_halt(0);
2841 }
2842 code_size += 1;
2843 } else {
2844 uint32_t total_num_doutw =
2845 program->num_dword_doutw + program->num_q_word_doutw;
2846 bool both_textures_and_uniforms =
2847 ((program->num_texture_dma_kicks > 0) &&
2848 ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) ||
2849 program->kick_usc));
2850 uint32_t doutu_constant64 = 0;
2851
2852 if (both_textures_and_uniforms) {
2853 /* If the size of a PDS data section is 0, the hardware won't run
2854 * it. We therefore don't need to branch when there is only a
2855 * texture OR a uniform update program.
2856 */
2857 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2858 uint32_t branch_address =
2859 MAX2(1 + program->num_texture_dma_kicks, 2);
2860
2861 /* Use If0 to BRAnch to uniform code. */
2862 *instruction++ = pvr_pds_encode_bra(
2863 /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0,
2864 /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE,
2865 /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2866 /* ADDR */ branch_address);
2867 }
2868
2869 code_size += 1;
2870 }
2871
2872 if (program->num_texture_dma_kicks > 0) {
2873 uint32_t dma_address_constant64;
2874 uint32_t dma_control_constant32;
2875 /* Allocate 3 constant spaces for each kick. The 64-bit constants
2876 * come first followed by the 32-bit constants.
2877 */
2878 dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE;
2879 dma_control_constant32 =
2880 dma_address_constant64 + (program->num_texture_dma_kicks * 2);
2881
2882 for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) {
2883 code_size += 1;
2884 if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
2885 continue;
2886
2887 /* DMA the state into the secondary attributes. */
2888 *instruction++ = pvr_pds_encode_doutd(
2889 /* cc */ 0,
2890 /* END */ dma == (program->num_texture_dma_kicks - 1),
2891 /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */
2892 /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2893 * 64-bit
2894 * Src0
2895 */
2896 dma_address_constant64 += 2;
2897 dma_control_constant32 += 1;
2898 }
2899 } else if (both_textures_and_uniforms) {
2900 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2901 /* End the program. */
2902 *instruction++ = pvr_pds_inst_encode_halt(0);
2903 }
2904
2905 code_size += 1;
2906 }
2907
2908 /* Reserve space at the beginning of the data segment for the DOUTU Task
2909 * Control if one is needed.
2910 */
2911 if (program->kick_usc) {
2912 doutu_constant64 =
2913 pvr_pds_get_constants(&next_constant, 2, &data_size);
2914 }
2915
2916 /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The
2917 * 64-bit constants come first followed by the 32-bit constants.
2918 */
2919 uint32_t total_size_dma =
2920 program->num_dword_doutw + 2 * program->num_q_word_doutw;
2921
2922 uint32_t dma_address_constant64 = pvr_pds_get_constants(
2923 &next_constant,
2924 program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw,
2925 &data_size);
2926 uint32_t doutw_value_constant64 =
2927 dma_address_constant64 + program->num_uniform_dma_kicks * 2;
2928 uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma;
2929 uint32_t doutw_control_constant32 =
2930 dma_control_constant32 + program->num_uniform_dma_kicks;
2931
2932 if (total_num_doutw > 0) {
2933 pvr_pds_get_constants(&next_constant, 0, &data_size);
2934
2935 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2936 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2937 /* Set the coefficient register to data value. */
2938 *instruction++ = pvr_pds_encode_doutw64(
2939 /* cc */ 0,
2940 /* END */ !program->num_uniform_dma_kicks &&
2941 !program->kick_usc && (i == total_num_doutw - 1),
2942 /* SRC1 */ doutw_control_constant32,
2943 /* SRC0 */ doutw_value_constant64 >> 1);
2944
2945 doutw_value_constant64 += 2;
2946 doutw_control_constant32 += 1;
2947 }
2948
2949 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2950 /* Set the coefficient register to data value. */
2951 *instruction++ = pvr_pds_encode_doutw64(
2952 /* cc */ 0,
2953 /* END */ !program->num_uniform_dma_kicks &&
2954 !program->kick_usc && (i == program->num_dword_doutw - 1),
2955 /* SRC1 */ doutw_control_constant32,
2956 /* SRC0 */ doutw_value_constant64 >> 1);
2957
2958 doutw_value_constant64 += 1;
2959 doutw_control_constant32 += 1;
2960 }
2961 }
2962 code_size += total_num_doutw;
2963 }
2964
2965 if (program->num_uniform_dma_kicks > 0) {
2966 for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) {
2967 code_size += 1;
2968
2969 if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
2970 continue;
2971
2972 bool last_instruction = false;
2973 if (!program->kick_usc &&
2974 (dma == program->num_uniform_dma_kicks - 1)) {
2975 last_instruction = true;
2976 }
2977 /* DMA the state into the secondary attributes. */
2978 *instruction++ = pvr_pds_encode_doutd(
2979 /* cc */ 0,
2980 /* END */ last_instruction,
2981 /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1
2982 */
2983 /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2984 * 64-bit
2985 * Src0
2986 */
2987 dma_address_constant64 += 2;
2988 dma_control_constant32 += 1;
2989 }
2990 }
2991
2992 if (program->kick_usc) {
2993 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
2994 /* Issue the task to the USC.
2995 *
2996 * dout ds1[constant_use], ds0[constant_use],
2997 * ds1[constant_use], emit
2998 */
2999
3000 *instruction++ = pvr_pds_encode_doutu(
3001 /* cc */ 0,
3002 /* END */ 1,
3003 /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */
3004 }
3005
3006 code_size += 1;
3007 } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) {
3008 if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3009 /* End the program. */
3010 *instruction++ = pvr_pds_inst_encode_halt(0);
3011 }
3012
3013 code_size += 1;
3014 }
3015 }
3016
3017 /* Minimum temp count is 1. */
3018 program->temps_used = MAX2(temps_used, 1);
3019 program->code_size = code_size;
3020
3021 if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
3022 return instruction;
3023 else
3024 return NULL;
3025 }
3026
3027 /**
3028 * Writes the Uniform Data block for the PDS pixel shader secondary attributes
3029 * program.
3030 *
3031 * \param program Pointer to the PDS pixel shader secondary attributes program.
3032 * \param buffer Pointer to the buffer for the code/data.
3033 * \param gen_mode Either code or data can be generated or sizes only updated.
3034 * \param dev_info PVR device information struct.
3035 * \returns Pointer to just beyond the buffer for the program/data.
3036 */
pvr_pds_pixel_shader_uniform_texture_data(struct pvr_pds_pixel_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,bool uniform,const struct pvr_device_info * dev_info)3037 uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
3038 struct pvr_pds_pixel_shader_sa_program *restrict program,
3039 uint32_t *restrict buffer,
3040 enum pvr_pds_generate_mode gen_mode,
3041 bool uniform,
3042 const struct pvr_device_info *dev_info)
3043 {
3044 uint32_t *constants = buffer;
3045 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3046 uint32_t temps_used = 0;
3047 uint32_t data_size = 0;
3048
3049 assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
3050 0);
3051
3052 assert(gen_mode != PDS_GENERATE_CODE_SEGMENT);
3053
3054 /* Shape of data segment (note: clear is different).
3055 *
3056 * Uniform Texture
3057 * +--------------+ +-------------+
3058 * | USC Task L | | USC Task L |
3059 * | H | | H |
3060 * | DMA1 Src0 L | | DMA1 Src0 L |
3061 * | H | | H |
3062 * | DMA2 Src0 L | | |
3063 * | H | | |
3064 * | DMA1 Src1 | | DMA1 Src1 |
3065 * | DMA2 Src1 | | |
3066 * | DOUTW0 Src1 | | |
3067 * | DOUTW1 Src1 | | |
3068 * | ... | | |
3069 * | DOUTWn Srcn | | |
3070 * | other data | | |
3071 * +--------------+ +-------------+
3072 */
3073
3074 /* Generate the PDS pixel shader secondary attributes data.
3075 *
3076 * Packed Clear
3077 * The clear color can arrive packed in the right form in the first (or
3078 * first 2) dwords of the shared registers and the program will issue a
3079 * single DOUTW for this.
3080 */
3081 if (program->clear && uniform && program->packed_clear) {
3082 uint32_t color_constant1 =
3083 pvr_pds_get_constants(&next_constant, 2, &data_size);
3084
3085 uint32_t control_word_constant1 =
3086 pvr_pds_get_constants(&next_constant, 2, &data_size);
3087
3088 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3089 uint32_t doutw;
3090
3091 pvr_pds_write_constant64(constants,
3092 color_constant1,
3093 program->clear_color[0],
3094 program->clear_color[1]);
3095
3096 /* Load into first constant in common store. */
3097 doutw = pvr_pds_encode_doutw_src1(
3098 program->clear_color_dest_reg,
3099 PVR_PDS_DOUTW_LOWER64,
3100 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3101 false,
3102 dev_info);
3103
3104 /* Set the last flag. */
3105 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3106 pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0);
3107 }
3108 } else if (program->clear && uniform) {
3109 uint32_t color_constant1, color_constant2;
3110
3111 if (program->clear_color_dest_reg & 0x1) {
3112 uint32_t color_constant3, control_word_constant1,
3113 control_word_constant2, color_constant4;
3114
3115 color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3116 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3117 color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3118
3119 control_word_constant1 =
3120 pvr_pds_get_constants(&next_constant, 2, &data_size);
3121 control_word_constant2 =
3122 pvr_pds_get_constants(&next_constant, 2, &data_size);
3123 color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3124
3125 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3126 uint32_t doutw;
3127
3128 pvr_pds_write_constant32(constants,
3129 color_constant1,
3130 program->clear_color[0]);
3131
3132 pvr_pds_write_constant64(constants,
3133 color_constant2,
3134 program->clear_color[1],
3135 program->clear_color[2]);
3136
3137 pvr_pds_write_constant32(constants,
3138 color_constant3,
3139 program->clear_color[3]);
3140
3141 /* Load into first constant in common store. */
3142 doutw = pvr_pds_encode_doutw_src1(
3143 program->clear_color_dest_reg,
3144 PVR_PDS_DOUTW_LOWER32,
3145 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3146 false,
3147 dev_info);
3148
3149 pvr_pds_write_constant64(constants,
3150 control_word_constant1,
3151 doutw,
3152 0);
3153
3154 /* Move the destination register along. */
3155 doutw = pvr_pds_encode_doutw_src1(
3156 program->clear_color_dest_reg + 1,
3157 PVR_PDS_DOUTW_LOWER64,
3158 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3159 false,
3160 dev_info);
3161
3162 pvr_pds_write_constant64(constants,
3163 control_word_constant2,
3164 doutw,
3165 0);
3166
3167 /* Move the destination register along. */
3168 doutw = pvr_pds_encode_doutw_src1(
3169 program->clear_color_dest_reg + 3,
3170 PVR_PDS_DOUTW_LOWER32,
3171 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3172 false,
3173 dev_info);
3174
3175 /* Set the last flag. */
3176 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3177 pvr_pds_write_constant64(constants, color_constant4, doutw, 0);
3178 }
3179 } else {
3180 uint32_t control_word_constant, control_word_last_constant;
3181
3182 /* Put the clear color and control words into the first 8
3183 * constants.
3184 */
3185 color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3186 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3187 control_word_constant =
3188 pvr_pds_get_constants(&next_constant, 2, &data_size);
3189 control_word_last_constant =
3190 pvr_pds_get_constants(&next_constant, 2, &data_size);
3191
3192 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3193 uint32_t doutw;
3194 pvr_pds_write_constant64(constants,
3195 color_constant1,
3196 program->clear_color[0],
3197 program->clear_color[1]);
3198
3199 pvr_pds_write_constant64(constants,
3200 color_constant2,
3201 program->clear_color[2],
3202 program->clear_color[3]);
3203
3204 /* Load into first constant in common store. */
3205 doutw = pvr_pds_encode_doutw_src1(
3206 program->clear_color_dest_reg,
3207 PVR_PDS_DOUTW_LOWER64,
3208 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3209 false,
3210 dev_info);
3211
3212 pvr_pds_write_constant64(constants, control_word_constant, doutw, 0);
3213
3214 /* Move the destination register along. */
3215 doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK;
3216 doutw |= (program->clear_color_dest_reg + 2)
3217 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
3218
3219 /* Set the last flag. */
3220 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3221 pvr_pds_write_constant64(constants,
3222 control_word_last_constant,
3223 doutw,
3224 0);
3225 }
3226 }
3227
3228 /* Constants for the DOUTU Task Control, if needed. */
3229 if (program->kick_usc) {
3230 uint32_t doutu_constant64 =
3231 pvr_pds_get_constants(&next_constant, 2, &data_size);
3232
3233 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3234 pvr_pds_write_wide_constant(
3235 constants,
3236 doutu_constant64,
3237 program->usc_task_control.src0); /* 64-bit
3238 */
3239 /* Src0 */
3240 }
3241 }
3242 } else {
3243 if (uniform) {
3244 /* Reserve space at the beginning of the data segment for the DOUTU
3245 * Task Control if one is needed.
3246 */
3247 if (program->kick_usc) {
3248 uint32_t doutu_constant64 =
3249 pvr_pds_get_constants(&next_constant, 2, &data_size);
3250
3251 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3252 pvr_pds_write_wide_constant(
3253 constants,
3254 doutu_constant64,
3255 program->usc_task_control.src0); /* 64-bit Src0 */
3256 }
3257 }
3258
3259 uint32_t total_num_doutw =
3260 program->num_dword_doutw + program->num_q_word_doutw;
3261 uint32_t total_size_dma =
3262 program->num_dword_doutw + 2 * program->num_q_word_doutw;
3263
3264 /* Allocate 3 constant spaces for each kick. The 64-bit constants
3265 * come first followed by the 32-bit constants.
3266 */
3267 uint32_t dma_address_constant64 =
3268 pvr_pds_get_constants(&next_constant,
3269 program->num_uniform_dma_kicks * 3 +
3270 total_size_dma + total_num_doutw,
3271 &data_size);
3272 uint32_t doutw_value_constant64 =
3273 dma_address_constant64 + program->num_uniform_dma_kicks * 2;
3274 uint32_t dma_control_constant32 =
3275 doutw_value_constant64 + total_size_dma;
3276 uint32_t doutw_control_constant32 =
3277 dma_control_constant32 + program->num_uniform_dma_kicks;
3278
3279 if (total_num_doutw > 0) {
3280 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3281 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
3282 pvr_pds_write_constant64(
3283 constants,
3284 doutw_value_constant64,
3285 program->q_word_doutw_value[2 * i],
3286 program->q_word_doutw_value[2 * i + 1]);
3287 pvr_pds_write_constant32(
3288 constants,
3289 doutw_control_constant32,
3290 program->q_word_doutw_control[i] |
3291 ((!program->num_uniform_dma_kicks &&
3292 i == total_num_doutw - 1)
3293 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3294 : 0));
3295
3296 doutw_value_constant64 += 2;
3297 doutw_control_constant32 += 1;
3298 }
3299
3300 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
3301 pvr_pds_write_constant32(constants,
3302 doutw_value_constant64,
3303 program->dword_doutw_value[i]);
3304 pvr_pds_write_constant32(
3305 constants,
3306 doutw_control_constant32,
3307 program->dword_doutw_control[i] |
3308 ((!program->num_uniform_dma_kicks &&
3309 i == program->num_dword_doutw - 1)
3310 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3311 : 0));
3312
3313 doutw_value_constant64 += 1;
3314 doutw_control_constant32 += 1;
3315 }
3316 }
3317 }
3318
3319 if (program->num_uniform_dma_kicks > 0) {
3320 uint32_t kick;
3321
3322 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3323 for (kick = 0; kick < program->num_uniform_dma_kicks - 1;
3324 kick++) {
3325 /* Copy the dma control words to constants. */
3326 pvr_pds_write_dma_address(constants,
3327 dma_address_constant64,
3328 program->uniform_dma_address[kick],
3329 false,
3330 dev_info);
3331 pvr_pds_write_constant32(constants,
3332 dma_control_constant32,
3333 program->uniform_dma_control[kick]);
3334
3335 dma_address_constant64 += 2;
3336 dma_control_constant32 += 1;
3337 }
3338
3339 pvr_pds_write_dma_address(constants,
3340 dma_address_constant64,
3341 program->uniform_dma_address[kick],
3342 false,
3343 dev_info);
3344 pvr_pds_write_constant32(
3345 constants,
3346 dma_control_constant32,
3347 program->uniform_dma_control[kick] |
3348 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3349 }
3350 }
3351
3352 } else if (program->num_texture_dma_kicks > 0) {
3353 /* Allocate 3 constant spaces for each kick. The 64-bit constants
3354 * come first followed by the 32-bit constants.
3355 */
3356 uint32_t dma_address_constant64 =
3357 pvr_pds_get_constants(&next_constant,
3358 program->num_texture_dma_kicks * 3,
3359 &data_size);
3360 uint32_t dma_control_constant32 =
3361 dma_address_constant64 + (program->num_texture_dma_kicks * 2);
3362
3363 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3364 uint32_t kick;
3365 for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) {
3366 /* Copy the DMA control words to constants. */
3367 pvr_pds_write_dma_address(constants,
3368 dma_address_constant64,
3369 program->texture_dma_address[kick],
3370 false,
3371 dev_info);
3372
3373 pvr_pds_write_constant32(constants,
3374 dma_control_constant32,
3375 program->texture_dma_control[kick]);
3376
3377 dma_address_constant64 += 2;
3378 dma_control_constant32 += 1;
3379 }
3380
3381 pvr_pds_write_dma_address(constants,
3382 dma_address_constant64,
3383 program->texture_dma_address[kick],
3384 false,
3385 dev_info);
3386
3387 pvr_pds_write_constant32(
3388 constants,
3389 dma_control_constant32,
3390 program->texture_dma_control[kick] |
3391 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3392 }
3393 }
3394 }
3395
3396 /* Save the data segment pointer and size. */
3397 program->data_segment = constants;
3398
3399 /* Minimum temp count is 1. */
3400 program->temps_used = MAX2(temps_used, 1);
3401 program->data_size = data_size;
3402
3403 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3404 return (constants + next_constant);
3405 else
3406 return NULL;
3407 }
3408
3409 /**
3410 * Generates generic DOUTC PDS program.
3411 *
3412 * \param program Pointer to the PDS kick USC.
3413 * \param buffer Pointer to the buffer for the program.
3414 * \param gen_mode Either code and data can be generated, or sizes only updated.
3415 * \returns Pointer to just beyond the buffer for the code or program segment.
3416 */
pvr_pds_generate_doutc(struct pvr_pds_fence_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)3417 uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
3418 uint32_t *restrict buffer,
3419 enum pvr_pds_generate_mode gen_mode)
3420 {
3421 uint32_t constant = 0;
3422
3423 /* Automatically get a data size of 1x 128bit chunks. */
3424 uint32_t data_size = 0, code_size = 0;
3425
3426 /* Setup the data part. */
3427 uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3428 uint32_t *instruction = buffer;
3429 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3430 * dwords.
3431 */
3432
3433 /* Update the program sizes. */
3434 program->data_size = data_size;
3435 program->code_size = code_size;
3436 program->data_segment = constants;
3437
3438 if (gen_mode == PDS_GENERATE_SIZES)
3439 return NULL;
3440
3441 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3442 /* Copy the USC task control words to constants. */
3443
3444 constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
3445 pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit
3446 * Src0
3447 */
3448
3449 uint32_t control_word_constant =
3450 pvr_pds_get_constants(&next_constant, 2, &data_size);
3451 pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit
3452 * Src1
3453 */
3454
3455 program->data_size = data_size;
3456 buffer += data_size;
3457
3458 return buffer;
3459 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3460 *instruction++ = pvr_pds_inst_encode_doutc(
3461 /* cc */ 0,
3462 /* END */ 0);
3463
3464 code_size++;
3465
3466 /* End the program. */
3467 *instruction++ = pvr_pds_inst_encode_halt(0);
3468 code_size++;
3469
3470 program->code_size = code_size;
3471 }
3472
3473 return instruction;
3474 }
3475
3476 /**
3477 * Generates generic kick DOUTU PDS program in a single data+code block.
3478 *
3479 * \param control Pointer to the PDS kick USC.
3480 * \param buffer Pointer to the buffer for the program.
3481 * \param gen_mode Either code and data can be generated or sizes only updated.
3482 * \param dev_info PVR device information structure.
3483 * \returns Pointer to just beyond the buffer for the code or program segment.
3484 */
pvr_pds_generate_doutw(struct pvr_pds_doutw_control * restrict control,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3485 uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control,
3486 uint32_t *restrict buffer,
3487 enum pvr_pds_generate_mode gen_mode,
3488 const struct pvr_device_info *dev_info)
3489 {
3490 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3491 uint32_t doutw;
3492 uint32_t data_size = 0, code_size = 0;
3493 uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3494 uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3495
3496 /* Assert if buffer is exceeded. */
3497 assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS);
3498
3499 uint32_t *constants = buffer;
3500 uint32_t *instruction = buffer;
3501
3502 /* Put the constants and control words interleaved in the data region. */
3503 for (uint32_t const_pair = 0; const_pair < control->num_const64;
3504 const_pair++) {
3505 constant[const_pair] =
3506 pvr_pds_get_constants(&next_constant, 2, &data_size);
3507 control_word_constant[const_pair] =
3508 pvr_pds_get_constants(&next_constant, 2, &data_size);
3509 }
3510
3511 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3512 /* Data segment points to start of constants. */
3513 control->data_segment = constants;
3514
3515 for (uint32_t const_pair = 0; const_pair < control->num_const64;
3516 const_pair++) {
3517 pvr_pds_write_constant64(constants,
3518 constant[const_pair],
3519 H32(control->doutw_data[const_pair]),
3520 L32(control->doutw_data[const_pair]));
3521
3522 /* Start loading at offset 0. */
3523 if (control->dest_store == PDS_COMMON_STORE) {
3524 doutw = pvr_pds_encode_doutw_src1(
3525 (2 * const_pair),
3526 PVR_PDS_DOUTW_LOWER64,
3527 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3528 false,
3529 dev_info);
3530 } else {
3531 doutw = pvr_pds_encode_doutw_src1(
3532 (2 * const_pair),
3533 PVR_PDS_DOUTW_LOWER64,
3534 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
3535 false,
3536 dev_info);
3537 }
3538
3539 if (const_pair + 1 == control->num_const64) {
3540 /* Set the last flag for the MCU (assume there are no following
3541 * DOUTD's).
3542 */
3543 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3544 }
3545 pvr_pds_write_constant64(constants,
3546 control_word_constant[const_pair],
3547 doutw,
3548 0);
3549 }
3550
3551 control->data_size = data_size;
3552 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3553 /* Code section. */
3554
3555 for (uint32_t const_pair = 0; const_pair < control->num_const64;
3556 const_pair++) {
3557 /* DOUTW the PDS data to the USC constants. */
3558 *instruction++ = pvr_pds_encode_doutw64(
3559 /* cc */ 0,
3560 /* END */ control->last_instruction &&
3561 (const_pair + 1 == control->num_const64),
3562 /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit
3563 * Src1.
3564 */
3565 /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */
3566
3567 code_size++;
3568 }
3569
3570 if (control->last_instruction) {
3571 /* End the program. */
3572 *instruction++ = pvr_pds_inst_encode_halt(0);
3573 code_size++;
3574 }
3575
3576 control->code_size = code_size;
3577 }
3578
3579 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3580 return (constants + next_constant);
3581 else
3582 return instruction;
3583 }
3584
3585 /**
3586 * Generates generic kick DOUTU PDS program in a single data+code block.
3587 *
3588 * \param program Pointer to the PDS kick USC.
3589 * \param buffer Pointer to the buffer for the program.
3590 * \param start_next_constant Next constant in data segment. Non-zero if another
3591 * instruction precedes the DOUTU.
3592 * \param cc_enabled If true then the DOUTU is predicated (cc set).
3593 * \param gen_mode Either code and data can be generated or sizes only updated.
3594 * \returns Pointer to just beyond the buffer for the code or program segment.
3595 */
pvr_pds_kick_usc(struct pvr_pds_kickusc_program * restrict program,uint32_t * restrict buffer,uint32_t start_next_constant,bool cc_enabled,enum pvr_pds_generate_mode gen_mode)3596 uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
3597 uint32_t *restrict buffer,
3598 uint32_t start_next_constant,
3599 bool cc_enabled,
3600 enum pvr_pds_generate_mode gen_mode)
3601 {
3602 uint32_t constant = 0;
3603
3604 /* Automatically get a data size of 2 128bit chunks. */
3605 uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE;
3606 uint32_t code_size = 1; /* Single doutu */
3607 uint32_t dummy_count = 0;
3608
3609 /* Setup the data part. */
3610 uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3611 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3612 * dwords.
3613 */
3614
3615 /* Update the program sizes. */
3616 program->data_size = data_size;
3617 program->code_size = code_size;
3618 program->data_segment = constants;
3619
3620 if (gen_mode == PDS_GENERATE_SIZES)
3621 return NULL;
3622
3623 if (gen_mode == PDS_GENERATE_DATA_SEGMENT ||
3624 gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3625 /* Copy the USC task control words to constants. */
3626
3627 constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count);
3628
3629 pvr_pds_write_wide_constant(constants,
3630 constant + 0,
3631 program->usc_task_control.src0); /* 64-bit
3632 * Src0.
3633 */
3634 buffer += data_size;
3635
3636 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3637 return buffer;
3638 }
3639
3640 if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
3641 gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3642 /* Generate the PDS pixel shader code. */
3643
3644 /* Setup the instruction pointer. */
3645 uint32_t *instruction = buffer;
3646
3647 /* Issue the task to the USC.
3648 *
3649 * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ;
3650 * halt halt
3651 */
3652
3653 *instruction++ = pvr_pds_encode_doutu(
3654 /* cc */ cc_enabled,
3655 /* END */ 1,
3656 /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU
3657 * 64-bit Src0
3658 */
3659
3660 /* Return pointer to just after last instruction. */
3661 return instruction;
3662 }
3663
3664 /* Execution should never reach here; keep compiler happy. */
3665 return NULL;
3666 }
3667
pvr_pds_generate_compute_barrier_conditional(uint32_t * buffer,enum pvr_pds_generate_mode gen_mode)3668 uint32_t *pvr_pds_generate_compute_barrier_conditional(
3669 uint32_t *buffer,
3670 enum pvr_pds_generate_mode gen_mode)
3671 {
3672 /* Compute barriers supported. Need to test for coeff sync task. */
3673
3674 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3675 return buffer; /* No data segment. */
3676
3677 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3678 /* Test whether this is the coefficient update task or not. */
3679 *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3680 */
3681 PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3682 */
3683 PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC
3684 */
3685 1 /* ADDR */);
3686
3687 /* Encode a HALT. */
3688 *buffer++ = pvr_pds_inst_encode_halt(1);
3689
3690 /* Reset the default predicate to IF0. */
3691 *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3692 */
3693 PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3694 */
3695 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC
3696 */
3697 1 /* ADDR */);
3698 }
3699
3700 return buffer;
3701 }
3702
3703 /**
3704 * Generates program to kick the USC task to store shared.
3705 *
3706 * \param program Pointer to the PDS shared register.
3707 * \param buffer Pointer to the buffer for the program.
3708 * \param gen_mode Either code and data can be generated or sizes only updated.
3709 * \param dev_info PVR device information structure.
3710 * \returns Pointer to just beyond the buffer for the program.
3711 */
pvr_pds_generate_shared_storing_program(struct pvr_pds_shared_storing_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3712 uint32_t *pvr_pds_generate_shared_storing_program(
3713 struct pvr_pds_shared_storing_program *restrict program,
3714 uint32_t *restrict buffer,
3715 enum pvr_pds_generate_mode gen_mode,
3716 const struct pvr_device_info *dev_info)
3717 {
3718 struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3719 struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3720
3721 if (gen_mode == PDS_GENERATE_SIZES)
3722 return NULL;
3723
3724 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3725 uint32_t *constants = buffer;
3726
3727 constants =
3728 pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info);
3729 program->data_size = doutw_control->data_size;
3730
3731 constants = pvr_pds_kick_usc(kick_usc_program,
3732 constants,
3733 0,
3734 program->cc_enable,
3735 gen_mode);
3736 program->data_size += kick_usc_program->data_size;
3737
3738 return constants;
3739 }
3740
3741 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3742 /* Generate PDS code segment. */
3743 uint32_t *instruction = buffer;
3744
3745 /* doutw vi1, vi0
3746 * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
3747 * emit
3748 */
3749 instruction =
3750 pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info);
3751 program->code_size = doutw_control->code_size;
3752
3753 /* Offset into data segment follows on from doutw data segment. */
3754 instruction = pvr_pds_kick_usc(kick_usc_program,
3755 instruction,
3756 doutw_control->data_size,
3757 program->cc_enable,
3758 gen_mode);
3759 program->code_size += kick_usc_program->code_size;
3760
3761 return instruction;
3762 }
3763
3764 /* Execution should never reach here. */
3765 return NULL;
3766 }
3767
pvr_pds_generate_fence_terminate_program(struct pvr_pds_fence_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3768 uint32_t *pvr_pds_generate_fence_terminate_program(
3769 struct pvr_pds_fence_program *restrict program,
3770 uint32_t *restrict buffer,
3771 enum pvr_pds_generate_mode gen_mode,
3772 const struct pvr_device_info *dev_info)
3773 {
3774 uint32_t data_size = 0;
3775 uint32_t code_size = 0;
3776
3777 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3778 /* Data segment. */
3779 uint32_t *constants, *constants_base;
3780
3781 constants = constants_base = (uint32_t *)buffer;
3782
3783 /* DOUTC sources are not used, but they must be valid. */
3784 pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT);
3785 data_size += program->data_size;
3786
3787 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3788 /* Append a 64-bit constant with value 1. Used to increment ptemp.
3789 * Return the offset into the data segment.
3790 */
3791 program->fence_constant_word =
3792 pvr_pds_append_constant64(constants_base, 1, &data_size);
3793 }
3794
3795 program->data_size = data_size;
3796 return constants;
3797 }
3798
3799 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3800 /* Code segment. */
3801 uint32_t *instruction = (uint32_t *)buffer;
3802
3803 instruction = pvr_pds_generate_compute_barrier_conditional(
3804 instruction,
3805 PDS_GENERATE_CODE_SEGMENT);
3806 code_size += 3;
3807
3808 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3809 /* lock */
3810 *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */
3811
3812 /* add64 pt[0], pt[0], #1 */
3813 *instruction++ = pvr_pds_inst_encode_add64(
3814 0, /* cc */
3815 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3816 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3817 PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0]
3818 */
3819 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3820 (program->fence_constant_word >> 1), /* src1 = 1 */
3821 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
3822 * ptemp[0]
3823 */
3824
3825 /* release */
3826 *instruction++ = pvr_pds_inst_encode_release(0); /* cc */
3827
3828 /* cmp pt[0] EQ 0x4 == Number of USC clusters per phantom */
3829 *instruction++ = pvr_pds_inst_encode_cmpi(
3830 0, /* cc */
3831 PVR_ROGUE_PDSINST_COP_EQ,
3832 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0
3833 * = ptemp[0]
3834 */
3835 PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0));
3836
3837 /* bra -1 */
3838 *instruction++ =
3839 pvr_pds_encode_bra(0, /* cc */
3840 1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE
3841 */
3842 0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0
3843 */
3844 -1); /* bra PC */
3845 code_size += 5;
3846 }
3847
3848 /* DOUTC */
3849 instruction = pvr_pds_generate_doutc(program,
3850 instruction,
3851 PDS_GENERATE_CODE_SEGMENT);
3852 code_size += program->code_size;
3853
3854 program->code_size = code_size;
3855 return instruction;
3856 }
3857
3858 /* Execution should never reach here. */
3859 return NULL;
3860 }
3861
3862 /**
3863 * Generates program to kick the USC task to load shared registers from memory.
3864 *
3865 * \param program Pointer to the PDS shared register.
3866 * \param buffer Pointer to the buffer for the program.
3867 * \param gen_mode Either code and data can be generated or sizes only updated.
3868 * \param dev_info PVR device information struct.
3869 * \returns Pointer to just beyond the buffer for the program.
3870 */
pvr_pds_generate_compute_shared_loading_program(struct pvr_pds_shared_storing_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3871 uint32_t *pvr_pds_generate_compute_shared_loading_program(
3872 struct pvr_pds_shared_storing_program *restrict program,
3873 uint32_t *restrict buffer,
3874 enum pvr_pds_generate_mode gen_mode,
3875 const struct pvr_device_info *dev_info)
3876 {
3877 struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3878 struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3879
3880 uint32_t next_constant;
3881 uint32_t data_size = 0;
3882 uint32_t code_size = 0;
3883
3884 /* This needs to persist to the CODE_SEGMENT call. */
3885 static uint32_t fence_constant_word = 0;
3886 uint64_t zero_constant64 = 0;
3887
3888 if (gen_mode == PDS_GENERATE_SIZES)
3889 return NULL;
3890
3891 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3892 uint32_t *constants = buffer;
3893
3894 constants = pvr_pds_generate_doutw(doutw_control,
3895 constants,
3896 PDS_GENERATE_DATA_SEGMENT,
3897 dev_info);
3898 data_size += doutw_control->data_size;
3899
3900 constants = pvr_pds_kick_usc(kick_usc_program,
3901 constants,
3902 0,
3903 program->cc_enable,
3904 gen_mode);
3905 data_size += kick_usc_program->data_size;
3906
3907 /* Copy the fence constant value (64-bit). */
3908 next_constant = data_size; /* Assumes data words fully packed. */
3909 fence_constant_word =
3910 pvr_pds_get_constants(&next_constant, 2, &data_size);
3911
3912 /* Encode the fence constant src0 (offset measured from start of data
3913 * buffer). Fence barrier is initialized to zero.
3914 */
3915 pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64);
3916 /* Update the const size. */
3917 data_size += 2;
3918 constants += 2;
3919
3920 program->data_size = data_size;
3921 return constants;
3922 }
3923
3924 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3925 /* Generate PDS code segment. */
3926 uint32_t *instruction = buffer;
3927
3928 /* add64 pt0, c0, c0
3929 * IF [2x Phantoms]
3930 * add64 pt1, c0, c0
3931 * st [constant_mem_addr], pt0, 4
3932 * ENDIF
3933 * doutw vi1, vi0
3934 * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
3935 * emit
3936 *
3937 * Zero the persistent temp (SW fence for context switch).
3938 */
3939 *instruction++ = pvr_pds_inst_encode_add64(
3940 0, /* cc */
3941 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3942 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3943 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3944 (fence_constant_word >> 1), /* src0
3945 * = 0
3946 */
3947 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3948 (fence_constant_word >> 1), /* src1
3949 * = 0
3950 */
3951 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0]
3952 */
3953 code_size++;
3954
3955 instruction = pvr_pds_generate_doutw(doutw_control,
3956 instruction,
3957 PDS_GENERATE_CODE_SEGMENT,
3958 dev_info);
3959 code_size += doutw_control->code_size;
3960
3961 /* Offset into data segment follows on from doutw data segment. */
3962 instruction = pvr_pds_kick_usc(kick_usc_program,
3963 instruction,
3964 doutw_control->data_size,
3965 program->cc_enable,
3966 gen_mode);
3967 code_size += kick_usc_program->code_size;
3968
3969 program->code_size = code_size;
3970 return instruction;
3971 }
3972
3973 /* Execution should never reach here. */
3974 return NULL;
3975 }
3976
3977 /**
3978 * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES.
3979 * Relies on num_fpu_iterators being initialized for size calculation.
3980 * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being
3981 * initialized for program generation.
3982 *
3983 * \param program Pointer to the PDS pixel shader program.
3984 * \param buffer Pointer to the buffer for the program.
3985 * \param gen_mode Either code and data can be generated or sizes only updated.
3986 * \returns Pointer to just beyond the buffer for the program.
3987 */
pvr_pds_coefficient_loading(struct pvr_pds_coeff_loading_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)3988 uint32_t *pvr_pds_coefficient_loading(
3989 struct pvr_pds_coeff_loading_program *restrict program,
3990 uint32_t *restrict buffer,
3991 enum pvr_pds_generate_mode gen_mode)
3992 {
3993 uint32_t constant;
3994 uint32_t *instruction;
3995 uint32_t total_data_size, code_size;
3996
3997 /* Place constants at the front of the buffer. */
3998 uint32_t *constants = buffer;
3999 /* Start counting constants from 0. */
4000 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4001
4002 /* Save the data segment pointer and size. */
4003 program->data_segment = constants;
4004
4005 total_data_size = 0;
4006 code_size = 0;
4007
4008 total_data_size += 2 * program->num_fpu_iterators;
4009 code_size += program->num_fpu_iterators;
4010
4011 /* Instructions start where constants finished, but we must take note of
4012 * alignment.
4013 *
4014 * 128-bit boundary = 4 dwords.
4015 */
4016 total_data_size = ALIGN_POT(total_data_size, 4);
4017 if (gen_mode != PDS_GENERATE_SIZES) {
4018 uint32_t data_size = 0;
4019 uint32_t iterator = 0;
4020
4021 instruction = buffer + total_data_size;
4022
4023 while (iterator < program->num_fpu_iterators) {
4024 uint64_t iterator_word;
4025
4026 /* Copy the USC task control words to constants. */
4027 constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
4028
4029 /* Write the first iterator. */
4030 iterator_word =
4031 (uint64_t)program->FPU_iterators[iterator]
4032 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT;
4033
4034 /* Write the destination. */
4035 iterator_word |=
4036 (uint64_t)program->destination[iterator++]
4037 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT;
4038
4039 /* If this is the last DOUTI word the "Last Issue" bit should be
4040 * set.
4041 */
4042 if (iterator >= program->num_fpu_iterators) {
4043 iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN;
4044 }
4045
4046 /* Write the word to the buffer. */
4047 pvr_pds_write_wide_constant(constants,
4048 constant,
4049 iterator_word); /* 64-bit
4050 Src0
4051 */
4052
4053 /* Write the DOUT instruction. */
4054 *instruction++ = pvr_pds_encode_douti(
4055 /* cc */ 0,
4056 /* END */ 0,
4057 /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */
4058 }
4059
4060 /* Update the last DOUTI instruction to have the END flag set. */
4061 *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT;
4062 } else {
4063 instruction = NULL;
4064 }
4065
4066 /* Update the data size and code size. Minimum temp count is 1. */
4067 program->temps_used = 1;
4068 program->data_size = total_data_size;
4069 program->code_size = code_size;
4070
4071 return instruction;
4072 }
4073
4074 /**
4075 * Generate a single ld/st instruction. This can correspond to one or more
4076 * real ld/st instructions based on the value of count.
4077 *
4078 * \param ld true to generate load, false to generate store.
4079 * \param control Cache mode control.
4080 * \param temp_index Dest temp for load/source temp for store, in 32bits
4081 * register index.
4082 * \param address Source for load/dest for store in bytes.
4083 * \param count Number of dwords for load/store.
4084 * \param next_constant
4085 * \param total_data_size
4086 * \param total_code_size
4087 * \param buffer Pointer to the buffer for the program.
4088 * \param data_fence Issue data fence.
4089 * \param gen_mode Either code and data can be generated or sizes only updated.
4090 * \param dev_info PVR device information structure.
4091 * \returns Pointer to just beyond the buffer for the program.
4092 */
pvr_pds_generate_single_ldst_instruction(bool ld,const struct pvr_pds_ldst_control * control,uint32_t temp_index,uint64_t address,uint32_t count,uint32_t * next_constant,uint32_t * total_data_size,uint32_t * total_code_size,uint32_t * restrict buffer,bool data_fence,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4093 uint32_t *pvr_pds_generate_single_ldst_instruction(
4094 bool ld,
4095 const struct pvr_pds_ldst_control *control,
4096 uint32_t temp_index,
4097 uint64_t address,
4098 uint32_t count,
4099 uint32_t *next_constant,
4100 uint32_t *total_data_size,
4101 uint32_t *total_code_size,
4102 uint32_t *restrict buffer,
4103 bool data_fence,
4104 enum pvr_pds_generate_mode gen_mode,
4105 const struct pvr_device_info *dev_info)
4106 {
4107 /* A single ld/ST here does NOT actually correspond to a single ld/ST
4108 * instruction, but may needs multiple ld/ST instructions because each ld/ST
4109 * instruction can only ld/ST a restricted max number of dwords which may
4110 * less than count passed here.
4111 */
4112
4113 uint32_t num_inst;
4114 uint32_t constant;
4115
4116 if (ld) {
4117 /* ld must operate on 64bits unit, and it needs to load from and to 128
4118 * bits aligned. Apart from the last ld, all the other need to ld 2x(x =
4119 * 1, 2, ...) times 64bits unit.
4120 */
4121 uint32_t per_inst_count = 0;
4122 uint32_t last_inst_count;
4123
4124 assert((gen_mode == PDS_GENERATE_SIZES) ||
4125 (((count % 2) == 0) && ((address % 16) == 0) &&
4126 (temp_index % 2) == 0));
4127
4128 count >>= 1;
4129 temp_index >>= 1;
4130
4131 /* Found out how many ld instructions are needed and ld size for the all
4132 * possible ld instructions.
4133 */
4134 if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) {
4135 num_inst = 1;
4136 last_inst_count = count;
4137 } else {
4138 per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE;
4139 if ((per_inst_count % 2) != 0)
4140 per_inst_count -= 1;
4141
4142 num_inst = count / per_inst_count;
4143 last_inst_count = count - per_inst_count * num_inst;
4144 num_inst += 1;
4145 }
4146
4147 /* Generate all the instructions. */
4148 for (uint32_t i = 0; i < num_inst; i++) {
4149 if ((i == (num_inst - 1)) && (last_inst_count == 0))
4150 break;
4151
4152 /* A single load instruction. */
4153 constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4154
4155 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4156 uint64_t ld_src0 = 0;
4157
4158 ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
4159 << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
4160 ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count
4161 : per_inst_count) &
4162 PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
4163 << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
4164 ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK)
4165 << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
4166
4167 if (!control) {
4168 ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED;
4169
4170 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
4171 ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED;
4172
4173 } else {
4174 ld_src0 |= control->cache_control_const;
4175 }
4176
4177 /* Write it to the constant. */
4178 pvr_pds_write_constant64(buffer,
4179 constant,
4180 (uint32_t)(ld_src0),
4181 (uint32_t)(ld_src0 >> 32));
4182
4183 /* Adjust value for next ld instruction. */
4184 temp_index += per_inst_count;
4185 address += (((uint64_t)(per_inst_count)) << 3);
4186 }
4187
4188 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4189 *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1);
4190
4191 if (data_fence)
4192 *buffer++ = pvr_pds_inst_encode_wdf(0);
4193 }
4194 }
4195 } else {
4196 /* ST needs source memory address to be 32bits aligned. */
4197 assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0));
4198
4199 /* Found out how many ST instructions are needed, each ST can only store
4200 * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits.
4201 */
4202 num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE;
4203 num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1);
4204
4205 /* Generate all the instructions. */
4206 for (uint32_t i = 0; i < num_inst; i++) {
4207 /* A single store instruction. */
4208 constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4209
4210 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4211 uint32_t per_inst_count =
4212 (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE
4213 ? count
4214 : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE);
4215 uint64_t st_src0 = 0;
4216
4217 st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
4218 << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
4219 st_src0 |=
4220 (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
4221 << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
4222 st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK)
4223 << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
4224
4225 if (!control) {
4226 st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH;
4227
4228 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
4229 st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH;
4230 }
4231
4232 } else {
4233 st_src0 |= control->cache_control_const;
4234 }
4235
4236 /* Write it to the constant. */
4237 pvr_pds_write_constant64(buffer,
4238 constant,
4239 (uint32_t)(st_src0),
4240 (uint32_t)(st_src0 >> 32));
4241
4242 /* Adjust value for next ST instruction. */
4243 temp_index += per_inst_count;
4244 count -= per_inst_count;
4245 address += (((uint64_t)(per_inst_count)) << 2);
4246 }
4247
4248 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4249 *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1);
4250
4251 if (data_fence)
4252 *buffer++ = pvr_pds_inst_encode_wdf(0);
4253 }
4254 }
4255 }
4256
4257 (*total_code_size) += num_inst;
4258 if (data_fence)
4259 (*total_code_size) += num_inst;
4260
4261 if (gen_mode != PDS_GENERATE_SIZES)
4262 return buffer;
4263 return NULL;
4264 }
4265
4266 /**
4267 * Generate programs used to prepare stream out, i.e., clear stream out buffer
4268 * overflow flags and update Persistent temps by a ld instruction.
4269 *
4270 * This must be used in PPP state update.
4271 *
4272 * \param program Pointer to the stream out program.
4273 * \param buffer Pointer to the buffer for the program.
4274 * \param store_mode If true then the data is stored to memory. If false then
4275 * the data is loaded from memory.
4276 * \param gen_mode Either code and data can be generated or sizes only updated.
4277 * \param dev_info PVR device information structure.
4278 * \returns Pointer to just beyond the buffer for the program.
4279 */
pvr_pds_generate_stream_out_init_program(struct pvr_pds_stream_out_init_program * restrict program,uint32_t * restrict buffer,bool store_mode,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4280 uint32_t *pvr_pds_generate_stream_out_init_program(
4281 struct pvr_pds_stream_out_init_program *restrict program,
4282 uint32_t *restrict buffer,
4283 bool store_mode,
4284 enum pvr_pds_generate_mode gen_mode,
4285 const struct pvr_device_info *dev_info)
4286 {
4287 uint32_t total_data_size = 0;
4288 uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
4289
4290 /* Start counting constants from 0. */
4291 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4292
4293 uint32_t total_code_size = 1;
4294
4295 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4296 /* We only need to clear global stream out predicate, other predicates
4297 * are not used during the stream out buffer overflow test.
4298 */
4299 *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10);
4300 }
4301
4302 for (uint32_t index = 0; index < program->num_buffers; index++) {
4303 if (program->dev_address_for_buffer_data[index] != 0) {
4304 /* Generate load/store program to load/store persistent temps. */
4305
4306 /* NOTE: store_mode == true case should be handled by
4307 * StreamOutTerminate.
4308 */
4309 buffer = pvr_pds_generate_single_ldst_instruction(
4310 !store_mode,
4311 NULL,
4312 PTDst,
4313 program->dev_address_for_buffer_data[index],
4314 program->pds_buffer_data_size[index],
4315 &next_constant,
4316 &total_data_size,
4317 &total_code_size,
4318 buffer,
4319 false,
4320 gen_mode,
4321 dev_info);
4322 }
4323
4324 PTDst += program->pds_buffer_data_size[index];
4325 }
4326
4327 total_code_size += 2;
4328
4329 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4330 /* We need to fence the loading. */
4331 *buffer++ = pvr_pds_inst_encode_wdf(0);
4332 *buffer++ = pvr_pds_inst_encode_halt(0);
4333 }
4334
4335 /* Save size information to program */
4336 program->stream_out_init_pds_data_size =
4337 ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4338 /* PDS program code size. */
4339 program->stream_out_init_pds_code_size = total_code_size;
4340
4341 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4342 return buffer + program->stream_out_init_pds_data_size;
4343 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4344 return buffer;
4345
4346 return NULL;
4347 }
4348
4349 /**
4350 * Generate stream out terminate program for stream out.
4351 *
4352 * If pds_persistent_temp_size_to_store is 0, the final primitive written value
4353 * will be stored.
4354 *
4355 * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps
4356 * will be stored into memory.
4357 *
4358 * The stream out terminate program is used to update the PPP state and the data
4359 * and code section cannot be separate.
4360 *
4361 * \param program Pointer to the stream out program.
4362 * \param buffer Pointer to the buffer for the program.
4363 * \param gen_mode Either code and data can be generated or sizes only updated.
4364 * \param dev_info PVR device info structure.
4365 * \returns Pointer to just beyond the buffer for the program.
4366 */
pvr_pds_generate_stream_out_terminate_program(struct pvr_pds_stream_out_terminate_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4367 uint32_t *pvr_pds_generate_stream_out_terminate_program(
4368 struct pvr_pds_stream_out_terminate_program *restrict program,
4369 uint32_t *restrict buffer,
4370 enum pvr_pds_generate_mode gen_mode,
4371 const struct pvr_device_info *dev_info)
4372 {
4373 uint32_t next_constant;
4374 uint32_t total_data_size = 0, total_code_size = 0;
4375
4376 /* Start counting constants from 0. */
4377 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4378
4379 /* Generate store program to store persistent temps. */
4380 buffer = pvr_pds_generate_single_ldst_instruction(
4381 false,
4382 NULL,
4383 PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER,
4384 program->dev_address_for_storing_persistent_temp,
4385 program->pds_persistent_temp_size_to_store,
4386 &next_constant,
4387 &total_data_size,
4388 &total_code_size,
4389 buffer,
4390 false,
4391 gen_mode,
4392 dev_info);
4393
4394 total_code_size += 2;
4395 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4396 *buffer++ = pvr_pds_inst_encode_wdf(0);
4397 *buffer++ = pvr_pds_inst_encode_halt(0);
4398 }
4399
4400 /* Save size information to program. */
4401 program->stream_out_terminate_pds_data_size =
4402 ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4403 /* PDS program code size. */
4404 program->stream_out_terminate_pds_code_size = total_code_size;
4405
4406 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4407 return buffer + program->stream_out_terminate_pds_data_size;
4408 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4409 return buffer;
4410
4411 return NULL;
4412 }
4413
4414 /* DrawArrays works in several steps:
4415 *
4416 * 1) load data from draw_indirect buffer
4417 * 2) tweak data to match hardware formats
4418 * 3) write data to indexblock
4419 * 4) signal the VDM to continue
4420 *
4421 * This is complicated by HW limitations on alignment, as well as a HWBRN.
4422 *
4423 * 1) Load data.
4424 * Loads _must_ be 128-bit aligned. Because there is no such limitation in the
4425 * spec we must deal with this by choosing an appropriate earlier address and
4426 * loading enough dwords that we load the entirety of the buffer.
4427 *
4428 * if addr & 0xf:
4429 * load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5]
4430 * data = tmp[0 + (uiAddr & 0xf) >> 2]...
4431 * else
4432 * load [addr] 4 dwords -> tmp[0, 1, 2, 3]
4433 * data = tmp[0]...
4434 *
4435 *
4436 * 2) Tweak data.
4437 * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in
4438 * the VDM control stream. We must subtract 1 from the loaded primCount.
4439 *
4440 * However, there is a HWBRN that disallows the ADD32 instruction from sourcing
4441 * a tmp that is non-64-bit-aligned. To work around this, we must move primCount
4442 * into another tmp that has the correct alignment. Note: this is only required
4443 * when data = tmp[even], as primCount is data+1:
4444 *
4445 * if data = tmp[even]:
4446 * primCount = data + 1 = tmp[odd] -- not 64-bit aligned!
4447 * else:
4448 * primCount = data + 1 = tmp[even] -- already aligned, don't need workaround.
4449 *
4450 * This boils down to:
4451 *
4452 * primCount = data[1]
4453 * primCountSrc = data[1]
4454 * if brn_present && (data is even):
4455 * mov scratch, primCount
4456 * primCountSrc = scratch
4457 * endif
4458 * sub primCount, primCountSrc, 1
4459 *
4460 * 3) Store Data.
4461 * Write the now-tweaked data over the top of the indexblock.
4462 * To ensure the write completes before the VDM re-reads the data, we must cause
4463 * a data hazard by doing a dummy (dummy meaning we don't care about the
4464 * returned data) load from the same addresses. Again, because the ld must
4465 * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the
4466 * index block is 128-bit aligned. This is the client driver's responsibility.
4467 *
4468 * st data[0, 1, 2] -> (idxblock + 4)
4469 * load [idxblock] 4 dwords
4470 *
4471 * 4) Signal the VDM
4472 * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue
4473 * where it is currently fenced on a dummy idxblock that has been inserted by
4474 * the driver.
4475 */
4476
4477 #include "pvr_draw_indirect_arrays0.h"
4478 #include "pvr_draw_indirect_arrays1.h"
4479 #include "pvr_draw_indirect_arrays2.h"
4480 #include "pvr_draw_indirect_arrays3.h"
4481
4482 #include "pvr_draw_indirect_arrays_base_instance0.h"
4483 #include "pvr_draw_indirect_arrays_base_instance1.h"
4484 #include "pvr_draw_indirect_arrays_base_instance2.h"
4485 #include "pvr_draw_indirect_arrays_base_instance3.h"
4486
4487 #include "pvr_draw_indirect_arrays_base_instance_drawid0.h"
4488 #include "pvr_draw_indirect_arrays_base_instance_drawid1.h"
4489 #include "pvr_draw_indirect_arrays_base_instance_drawid2.h"
4490 #include "pvr_draw_indirect_arrays_base_instance_drawid3.h"
4491
4492 #define ENABLE_SLC_MCU_CACHE_CONTROLS(device) \
4493 ((device)->features.has_slc_mcu_cache_controls \
4494 ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
4495 : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS)
4496
pvr_pds_generate_draw_arrays_indirect(struct pvr_pds_drawindirect_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4497 void pvr_pds_generate_draw_arrays_indirect(
4498 struct pvr_pds_drawindirect_program *restrict program,
4499 uint32_t *restrict buffer,
4500 enum pvr_pds_generate_mode gen_mode,
4501 const struct pvr_device_info *dev_info)
4502 {
4503 if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4504 (gen_mode == PDS_GENERATE_SIZES)) {
4505 const struct pvr_psc_program_output *psc_program = NULL;
4506 switch ((program->arg_buffer >> 2) % 4) {
4507 case 0:
4508 if (program->support_base_instance) {
4509 if (program->increment_draw_id) {
4510 psc_program =
4511 &pvr_draw_indirect_arrays_base_instance_drawid0_program;
4512 } else {
4513 psc_program = &pvr_draw_indirect_arrays_base_instance0_program;
4514 }
4515 } else {
4516 psc_program = &pvr_draw_indirect_arrays0_program;
4517 }
4518 break;
4519 case 1:
4520 if (program->support_base_instance) {
4521 if (program->increment_draw_id) {
4522 psc_program =
4523 &pvr_draw_indirect_arrays_base_instance_drawid1_program;
4524 } else {
4525 psc_program = &pvr_draw_indirect_arrays_base_instance1_program;
4526 }
4527 } else {
4528 psc_program = &pvr_draw_indirect_arrays1_program;
4529 }
4530 break;
4531 case 2:
4532 if (program->support_base_instance) {
4533 if (program->increment_draw_id) {
4534 psc_program =
4535 &pvr_draw_indirect_arrays_base_instance_drawid2_program;
4536 } else {
4537 psc_program = &pvr_draw_indirect_arrays_base_instance2_program;
4538 }
4539 } else {
4540 psc_program = &pvr_draw_indirect_arrays2_program;
4541 }
4542 break;
4543 case 3:
4544 if (program->support_base_instance) {
4545 if (program->increment_draw_id) {
4546 psc_program =
4547 &pvr_draw_indirect_arrays_base_instance_drawid3_program;
4548 } else {
4549 psc_program = &pvr_draw_indirect_arrays_base_instance3_program;
4550 }
4551 } else {
4552 psc_program = &pvr_draw_indirect_arrays3_program;
4553 }
4554 break;
4555 }
4556
4557 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4558 memcpy(buffer,
4559 psc_program->code,
4560 psc_program->code_size * sizeof(uint32_t));
4561 #if defined(DUMP_PDS)
4562 for (uint32_t i = 0; i < psc_program->code_size; i++)
4563 PVR_PDS_PRINT_INST(buffer[i]);
4564 #endif
4565 }
4566
4567 program->program = *psc_program;
4568 } else {
4569 switch ((program->arg_buffer >> 2) % 4) {
4570 case 0:
4571 if (program->support_base_instance) {
4572 if (program->increment_draw_id) {
4573 pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(
4574 buffer,
4575 program->arg_buffer & ~0xfull,
4576 dev_info);
4577 pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(
4578 buffer,
4579 program->index_list_addr_buffer + 4);
4580 pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(
4581 buffer,
4582 program->index_list_addr_buffer);
4583 pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(
4584 buffer,
4585 program->num_views);
4586 pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates(
4587 buffer);
4588 } else {
4589 pvr_write_draw_indirect_arrays_base_instance0_di_data(
4590 buffer,
4591 program->arg_buffer & ~0xfull,
4592 dev_info);
4593 pvr_write_draw_indirect_arrays_base_instance0_write_vdm(
4594 buffer,
4595 program->index_list_addr_buffer + 4);
4596 pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(
4597 buffer,
4598 program->index_list_addr_buffer);
4599 pvr_write_draw_indirect_arrays_base_instance0_num_views(
4600 buffer,
4601 program->num_views);
4602 pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer);
4603 }
4604 } else {
4605 pvr_write_draw_indirect_arrays0_di_data(buffer,
4606 program->arg_buffer &
4607 ~0xfull,
4608 dev_info);
4609 pvr_write_draw_indirect_arrays0_write_vdm(
4610 buffer,
4611 program->index_list_addr_buffer + 4);
4612 pvr_write_draw_indirect_arrays0_flush_vdm(
4613 buffer,
4614 program->index_list_addr_buffer);
4615 pvr_write_draw_indirect_arrays0_num_views(buffer,
4616 program->num_views);
4617 pvr_write_draw_indirect_arrays0_immediates(buffer);
4618 }
4619 break;
4620 case 1:
4621 if (program->support_base_instance) {
4622 if (program->increment_draw_id) {
4623 pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(
4624 buffer,
4625 program->arg_buffer & ~0xfull,
4626 dev_info);
4627 pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(
4628 buffer,
4629 program->index_list_addr_buffer + 4);
4630 pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(
4631 buffer,
4632 program->index_list_addr_buffer);
4633 pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(
4634 buffer,
4635 program->num_views);
4636 pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates(
4637 buffer);
4638 } else {
4639 pvr_write_draw_indirect_arrays_base_instance1_di_data(
4640 buffer,
4641 program->arg_buffer & ~0xfull,
4642 dev_info);
4643 pvr_write_draw_indirect_arrays_base_instance1_write_vdm(
4644 buffer,
4645 program->index_list_addr_buffer + 4);
4646 pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(
4647 buffer,
4648 program->index_list_addr_buffer);
4649 pvr_write_draw_indirect_arrays_base_instance1_num_views(
4650 buffer,
4651 program->num_views);
4652 pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer);
4653 }
4654 } else {
4655 pvr_write_draw_indirect_arrays1_di_data(buffer,
4656 program->arg_buffer &
4657 ~0xfull,
4658 dev_info);
4659 pvr_write_draw_indirect_arrays1_write_vdm(
4660 buffer,
4661 program->index_list_addr_buffer + 4);
4662 pvr_write_draw_indirect_arrays1_flush_vdm(
4663 buffer,
4664 program->index_list_addr_buffer);
4665 pvr_write_draw_indirect_arrays1_num_views(buffer,
4666 program->num_views);
4667 pvr_write_draw_indirect_arrays1_immediates(buffer);
4668 }
4669 break;
4670 case 2:
4671 if (program->support_base_instance) {
4672 if (program->increment_draw_id) {
4673 pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(
4674 buffer,
4675 program->arg_buffer & ~0xfull,
4676 dev_info);
4677 pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(
4678 buffer,
4679 program->index_list_addr_buffer + 4);
4680 pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(
4681 buffer,
4682 program->index_list_addr_buffer);
4683 pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(
4684 buffer,
4685 program->num_views);
4686 pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates(
4687 buffer);
4688 } else {
4689 pvr_write_draw_indirect_arrays_base_instance2_di_data(
4690 buffer,
4691 program->arg_buffer & ~0xfull,
4692 dev_info);
4693 pvr_write_draw_indirect_arrays_base_instance2_write_vdm(
4694 buffer,
4695 program->index_list_addr_buffer + 4);
4696 pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(
4697 buffer,
4698 program->index_list_addr_buffer);
4699 pvr_write_draw_indirect_arrays_base_instance2_num_views(
4700 buffer,
4701 program->num_views);
4702 pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer);
4703 }
4704 } else {
4705 pvr_write_draw_indirect_arrays2_di_data(buffer,
4706 program->arg_buffer &
4707 ~0xfull,
4708 dev_info);
4709 pvr_write_draw_indirect_arrays2_write_vdm(
4710 buffer,
4711 program->index_list_addr_buffer + 4);
4712 pvr_write_draw_indirect_arrays2_flush_vdm(
4713 buffer,
4714 program->index_list_addr_buffer);
4715 pvr_write_draw_indirect_arrays2_num_views(buffer,
4716 program->num_views);
4717 pvr_write_draw_indirect_arrays2_immediates(buffer);
4718 }
4719 break;
4720 case 3:
4721 if (program->support_base_instance) {
4722 if (program->increment_draw_id) {
4723 pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(
4724 buffer,
4725 program->arg_buffer & ~0xfull,
4726 dev_info);
4727 pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(
4728 buffer,
4729 program->index_list_addr_buffer + 4);
4730 pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(
4731 buffer,
4732 program->index_list_addr_buffer);
4733 pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(
4734 buffer,
4735 program->num_views);
4736 pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates(
4737 buffer);
4738 } else {
4739 pvr_write_draw_indirect_arrays_base_instance3_di_data(
4740 buffer,
4741 program->arg_buffer & ~0xfull,
4742 dev_info);
4743 pvr_write_draw_indirect_arrays_base_instance3_write_vdm(
4744 buffer,
4745 program->index_list_addr_buffer + 4);
4746 pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(
4747 buffer,
4748 program->index_list_addr_buffer);
4749 pvr_write_draw_indirect_arrays_base_instance3_num_views(
4750 buffer,
4751 program->num_views);
4752 pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer);
4753 }
4754 } else {
4755 pvr_write_draw_indirect_arrays3_di_data(buffer,
4756 program->arg_buffer &
4757 ~0xfull,
4758 dev_info);
4759 pvr_write_draw_indirect_arrays3_write_vdm(
4760 buffer,
4761 program->index_list_addr_buffer + 4);
4762 pvr_write_draw_indirect_arrays3_flush_vdm(
4763 buffer,
4764 program->index_list_addr_buffer);
4765 pvr_write_draw_indirect_arrays3_num_views(buffer,
4766 program->num_views);
4767 pvr_write_draw_indirect_arrays3_immediates(buffer);
4768 }
4769 break;
4770 }
4771 }
4772 }
4773
4774 #include "pvr_draw_indirect_elements0.h"
4775 #include "pvr_draw_indirect_elements1.h"
4776 #include "pvr_draw_indirect_elements2.h"
4777 #include "pvr_draw_indirect_elements3.h"
4778 #include "pvr_draw_indirect_elements_base_instance0.h"
4779 #include "pvr_draw_indirect_elements_base_instance1.h"
4780 #include "pvr_draw_indirect_elements_base_instance2.h"
4781 #include "pvr_draw_indirect_elements_base_instance3.h"
4782 #include "pvr_draw_indirect_elements_base_instance_drawid0.h"
4783 #include "pvr_draw_indirect_elements_base_instance_drawid1.h"
4784 #include "pvr_draw_indirect_elements_base_instance_drawid2.h"
4785 #include "pvr_draw_indirect_elements_base_instance_drawid3.h"
4786
pvr_pds_generate_draw_elements_indirect(struct pvr_pds_drawindirect_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4787 void pvr_pds_generate_draw_elements_indirect(
4788 struct pvr_pds_drawindirect_program *restrict program,
4789 uint32_t *restrict buffer,
4790 enum pvr_pds_generate_mode gen_mode,
4791 const struct pvr_device_info *dev_info)
4792 {
4793 if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4794 (gen_mode == PDS_GENERATE_SIZES)) {
4795 const struct pvr_psc_program_output *psc_program = NULL;
4796 switch ((program->arg_buffer >> 2) % 4) {
4797 case 0:
4798 if (program->support_base_instance) {
4799 if (program->increment_draw_id) {
4800 psc_program =
4801 &pvr_draw_indirect_elements_base_instance_drawid0_program;
4802 } else {
4803 psc_program = &pvr_draw_indirect_elements_base_instance0_program;
4804 }
4805 } else {
4806 psc_program = &pvr_draw_indirect_elements0_program;
4807 }
4808 break;
4809 case 1:
4810 if (program->support_base_instance) {
4811 if (program->increment_draw_id) {
4812 psc_program =
4813 &pvr_draw_indirect_elements_base_instance_drawid1_program;
4814 } else {
4815 psc_program = &pvr_draw_indirect_elements_base_instance1_program;
4816 }
4817 } else {
4818 psc_program = &pvr_draw_indirect_elements1_program;
4819 }
4820 break;
4821 case 2:
4822 if (program->support_base_instance) {
4823 if (program->increment_draw_id) {
4824 psc_program =
4825 &pvr_draw_indirect_elements_base_instance_drawid2_program;
4826 } else {
4827 psc_program = &pvr_draw_indirect_elements_base_instance2_program;
4828 }
4829 } else {
4830 psc_program = &pvr_draw_indirect_elements2_program;
4831 }
4832 break;
4833 case 3:
4834 if (program->support_base_instance) {
4835 if (program->increment_draw_id) {
4836 psc_program =
4837 &pvr_draw_indirect_elements_base_instance_drawid3_program;
4838 } else {
4839 psc_program = &pvr_draw_indirect_elements_base_instance3_program;
4840 }
4841 } else {
4842 psc_program = &pvr_draw_indirect_elements3_program;
4843 }
4844 break;
4845 }
4846
4847 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4848 memcpy(buffer,
4849 psc_program->code,
4850 psc_program->code_size * sizeof(uint32_t));
4851
4852 #if defined(DUMP_PDS)
4853 for (uint32_t i = 0; i < psc_program->code_size; i++)
4854 PVR_PDS_PRINT_INST(buffer[i]);
4855 #endif
4856 }
4857
4858 program->program = *psc_program;
4859 } else {
4860 switch ((program->arg_buffer >> 2) % 4) {
4861 case 0:
4862 if (program->support_base_instance) {
4863 if (program->increment_draw_id) {
4864 pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(
4865 buffer,
4866 program->arg_buffer & ~0xfull,
4867 dev_info);
4868 pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm(
4869 buffer,
4870 program->index_list_addr_buffer);
4871 pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm(
4872 buffer,
4873 program->index_list_addr_buffer);
4874 pvr_write_draw_indirect_elements_base_instance_drawid0_num_views(
4875 buffer,
4876 program->num_views);
4877 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride(
4878 buffer,
4879 program->index_stride);
4880 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base(
4881 buffer,
4882 program->index_buffer);
4883 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header(
4884 buffer,
4885 program->index_block_header);
4886 pvr_write_draw_indirect_elements_base_instance_drawid0_immediates(
4887 buffer);
4888 } else {
4889 pvr_write_draw_indirect_elements_base_instance0_di_data(
4890 buffer,
4891 program->arg_buffer & ~0xfull,
4892 dev_info);
4893 pvr_write_draw_indirect_elements_base_instance0_write_vdm(
4894 buffer,
4895 program->index_list_addr_buffer);
4896 pvr_write_draw_indirect_elements_base_instance0_flush_vdm(
4897 buffer,
4898 program->index_list_addr_buffer);
4899 pvr_write_draw_indirect_elements_base_instance0_num_views(
4900 buffer,
4901 program->num_views);
4902 pvr_write_draw_indirect_elements_base_instance0_idx_stride(
4903 buffer,
4904 program->index_stride);
4905 pvr_write_draw_indirect_elements_base_instance0_idx_base(
4906 buffer,
4907 program->index_buffer);
4908 pvr_write_draw_indirect_elements_base_instance0_idx_header(
4909 buffer,
4910 program->index_block_header);
4911 pvr_write_draw_indirect_elements_base_instance0_immediates(
4912 buffer);
4913 }
4914 } else {
4915 pvr_write_draw_indirect_elements0_di_data(buffer,
4916 program->arg_buffer &
4917 ~0xfull,
4918 dev_info);
4919 pvr_write_draw_indirect_elements0_write_vdm(
4920 buffer,
4921 program->index_list_addr_buffer);
4922 pvr_write_draw_indirect_elements0_flush_vdm(
4923 buffer,
4924 program->index_list_addr_buffer);
4925 pvr_write_draw_indirect_elements0_num_views(buffer,
4926 program->num_views);
4927 pvr_write_draw_indirect_elements0_idx_stride(buffer,
4928 program->index_stride);
4929 pvr_write_draw_indirect_elements0_idx_base(buffer,
4930 program->index_buffer);
4931 pvr_write_draw_indirect_elements0_idx_header(
4932 buffer,
4933 program->index_block_header);
4934 pvr_write_draw_indirect_elements0_immediates(buffer);
4935 }
4936 break;
4937 case 1:
4938 if (program->support_base_instance) {
4939 if (program->increment_draw_id) {
4940 pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(
4941 buffer,
4942 program->arg_buffer & ~0xfull,
4943 dev_info);
4944 pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm(
4945 buffer,
4946 program->index_list_addr_buffer);
4947 pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm(
4948 buffer,
4949 program->index_list_addr_buffer);
4950 pvr_write_draw_indirect_elements_base_instance_drawid1_num_views(
4951 buffer,
4952 program->num_views);
4953 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride(
4954 buffer,
4955 program->index_stride);
4956 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base(
4957 buffer,
4958 program->index_buffer);
4959 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header(
4960 buffer,
4961 program->index_block_header);
4962 pvr_write_draw_indirect_elements_base_instance_drawid1_immediates(
4963 buffer);
4964 } else {
4965 pvr_write_draw_indirect_elements_base_instance1_di_data(
4966 buffer,
4967 program->arg_buffer & ~0xfull,
4968 dev_info);
4969 pvr_write_draw_indirect_elements_base_instance1_write_vdm(
4970 buffer,
4971 program->index_list_addr_buffer);
4972 pvr_write_draw_indirect_elements_base_instance1_flush_vdm(
4973 buffer,
4974 program->index_list_addr_buffer);
4975 pvr_write_draw_indirect_elements_base_instance1_num_views(
4976 buffer,
4977 program->num_views);
4978 pvr_write_draw_indirect_elements_base_instance1_idx_stride(
4979 buffer,
4980 program->index_stride);
4981 pvr_write_draw_indirect_elements_base_instance1_idx_base(
4982 buffer,
4983 program->index_buffer);
4984 pvr_write_draw_indirect_elements_base_instance1_idx_header(
4985 buffer,
4986 program->index_block_header);
4987 pvr_write_draw_indirect_elements_base_instance1_immediates(
4988 buffer);
4989 }
4990 } else {
4991 pvr_write_draw_indirect_elements1_di_data(buffer,
4992 program->arg_buffer &
4993 ~0xfull,
4994 dev_info);
4995 pvr_write_draw_indirect_elements1_write_vdm(
4996 buffer,
4997 program->index_list_addr_buffer);
4998 pvr_write_draw_indirect_elements1_flush_vdm(
4999 buffer,
5000 program->index_list_addr_buffer);
5001 pvr_write_draw_indirect_elements1_num_views(buffer,
5002 program->num_views);
5003 pvr_write_draw_indirect_elements1_idx_stride(buffer,
5004 program->index_stride);
5005 pvr_write_draw_indirect_elements1_idx_base(buffer,
5006 program->index_buffer);
5007 pvr_write_draw_indirect_elements1_idx_header(
5008 buffer,
5009 program->index_block_header);
5010 pvr_write_draw_indirect_elements1_immediates(buffer);
5011 }
5012 break;
5013 case 2:
5014 if (program->support_base_instance) {
5015 if (program->increment_draw_id) {
5016 pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(
5017 buffer,
5018 program->arg_buffer & ~0xfull,
5019 dev_info);
5020 pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm(
5021 buffer,
5022 program->index_list_addr_buffer);
5023 pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm(
5024 buffer,
5025 program->index_list_addr_buffer);
5026 pvr_write_draw_indirect_elements_base_instance_drawid2_num_views(
5027 buffer,
5028 program->num_views);
5029 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride(
5030 buffer,
5031 program->index_stride);
5032 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base(
5033 buffer,
5034 program->index_buffer);
5035 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header(
5036 buffer,
5037 program->index_block_header);
5038 pvr_write_draw_indirect_elements_base_instance_drawid2_immediates(
5039 buffer);
5040 } else {
5041 pvr_write_draw_indirect_elements_base_instance2_di_data(
5042 buffer,
5043 program->arg_buffer & ~0xfull,
5044 dev_info);
5045 pvr_write_draw_indirect_elements_base_instance2_write_vdm(
5046 buffer,
5047 program->index_list_addr_buffer);
5048 pvr_write_draw_indirect_elements_base_instance2_flush_vdm(
5049 buffer,
5050 program->index_list_addr_buffer);
5051 pvr_write_draw_indirect_elements_base_instance2_num_views(
5052 buffer,
5053 program->num_views);
5054 pvr_write_draw_indirect_elements_base_instance2_idx_stride(
5055 buffer,
5056 program->index_stride);
5057 pvr_write_draw_indirect_elements_base_instance2_idx_base(
5058 buffer,
5059 program->index_buffer);
5060 pvr_write_draw_indirect_elements_base_instance2_idx_header(
5061 buffer,
5062 program->index_block_header);
5063 pvr_write_draw_indirect_elements_base_instance2_immediates(
5064 buffer);
5065 }
5066 } else {
5067 pvr_write_draw_indirect_elements2_di_data(buffer,
5068 program->arg_buffer &
5069 ~0xfull,
5070 dev_info);
5071 pvr_write_draw_indirect_elements2_write_vdm(
5072 buffer,
5073 program->index_list_addr_buffer);
5074 pvr_write_draw_indirect_elements2_flush_vdm(
5075 buffer,
5076 program->index_list_addr_buffer);
5077 pvr_write_draw_indirect_elements2_num_views(buffer,
5078 program->num_views);
5079 pvr_write_draw_indirect_elements2_idx_stride(buffer,
5080 program->index_stride);
5081 pvr_write_draw_indirect_elements2_idx_base(buffer,
5082 program->index_buffer);
5083 pvr_write_draw_indirect_elements2_idx_header(
5084 buffer,
5085 program->index_block_header);
5086 pvr_write_draw_indirect_elements2_immediates(buffer);
5087 }
5088 break;
5089 case 3:
5090 if (program->support_base_instance) {
5091 if (program->increment_draw_id) {
5092 pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(
5093 buffer,
5094 program->arg_buffer & ~0xfull,
5095 dev_info);
5096 pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm(
5097 buffer,
5098 program->index_list_addr_buffer);
5099 pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm(
5100 buffer,
5101 program->index_list_addr_buffer);
5102 pvr_write_draw_indirect_elements_base_instance_drawid3_num_views(
5103 buffer,
5104 program->num_views);
5105 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride(
5106 buffer,
5107 program->index_stride);
5108 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base(
5109 buffer,
5110 program->index_buffer);
5111 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header(
5112 buffer,
5113 program->index_block_header);
5114 pvr_write_draw_indirect_elements_base_instance_drawid3_immediates(
5115 buffer);
5116 } else {
5117 pvr_write_draw_indirect_elements_base_instance3_di_data(
5118 buffer,
5119 program->arg_buffer & ~0xfull,
5120 dev_info);
5121 pvr_write_draw_indirect_elements_base_instance3_write_vdm(
5122 buffer,
5123 program->index_list_addr_buffer);
5124 pvr_write_draw_indirect_elements_base_instance3_flush_vdm(
5125 buffer,
5126 program->index_list_addr_buffer);
5127 pvr_write_draw_indirect_elements_base_instance3_num_views(
5128 buffer,
5129 program->num_views);
5130 pvr_write_draw_indirect_elements_base_instance3_idx_stride(
5131 buffer,
5132 program->index_stride);
5133 pvr_write_draw_indirect_elements_base_instance3_idx_base(
5134 buffer,
5135 program->index_buffer);
5136 pvr_write_draw_indirect_elements_base_instance3_idx_header(
5137 buffer,
5138 program->index_block_header);
5139 pvr_write_draw_indirect_elements_base_instance3_immediates(
5140 buffer);
5141 }
5142 } else {
5143 pvr_write_draw_indirect_elements3_di_data(buffer,
5144 program->arg_buffer &
5145 ~0xfull,
5146 dev_info);
5147 pvr_write_draw_indirect_elements3_write_vdm(
5148 buffer,
5149 program->index_list_addr_buffer);
5150 pvr_write_draw_indirect_elements3_flush_vdm(
5151 buffer,
5152 program->index_list_addr_buffer);
5153 pvr_write_draw_indirect_elements3_num_views(buffer,
5154 program->num_views);
5155 pvr_write_draw_indirect_elements3_idx_stride(buffer,
5156 program->index_stride);
5157 pvr_write_draw_indirect_elements3_idx_base(buffer,
5158 program->index_buffer);
5159 pvr_write_draw_indirect_elements3_idx_header(
5160 buffer,
5161 program->index_block_header);
5162 pvr_write_draw_indirect_elements3_immediates(buffer);
5163 }
5164 break;
5165 }
5166 }
5167 }
5168