1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stdint.h>
27 #include <stdio.h>
28 #include <string.h>
29
30 #include "pvr_device_info.h"
31 #include "pvr_pds.h"
32 #include "pvr_rogue_pds_defs.h"
33 #include "pvr_rogue_pds_disasm.h"
34 #include "pvr_rogue_pds_encode.h"
35 #include "util/log.h"
36 #include "util/macros.h"
37
38 #define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL))
39 #define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL))
40
41 /*****************************************************************************
42 Macro definitions
43 *****************************************************************************/
44
45 #define PVR_PDS_DWORD_SHIFT 2
46
47 #define PVR_PDS_CONSTANTS_BLOCK_BASE 0
48 #define PVR_PDS_CONSTANTS_BLOCK_SIZE 128
49 #define PVR_PDS_TEMPS_BLOCK_BASE 128
50 #define PVR_PDS_TEMPS_BLOCK_SIZE 32
51
52 #define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK
53 #define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK
54
55 /* Map PDS temp registers to the CDM values they contain Work-group IDs are only
56 * available in the coefficient sync task.
57 */
58 #define PVR_PDS_CDM_WORK_GROUP_ID_X 0
59 #define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
60 #define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
61 /* Local IDs are available in every task. */
62 #define PVR_PDS_CDM_LOCAL_ID_X 0
63 #define PVR_PDS_CDM_LOCAL_ID_YZ 1
64
65 #define PVR_PDS_DOUTW_LOWER32 0x0
66 #define PVR_PDS_DOUTW_UPPER32 0x1
67 #define PVR_PDS_DOUTW_LOWER64 0x2
68 #define PVR_PDS_DOUTW_LOWER128 0x3
69 #define PVR_PDS_DOUTW_MAXMASK 0x4
70
71 #define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U
72 #define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U)
73
74 /*****************************************************************************
75 Static variables
76 *****************************************************************************/
77
78 static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = {
79 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER,
80 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER,
81 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64,
82 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64
83 };
84
85 /* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use
86 * cache_control_const[1].
87 */
88 static const uint32_t cache_control_const[2][2] = {
89 { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS,
90 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED },
91 { 0, 0 }
92 };
93
94 /*****************************************************************************
95 Function definitions
96 *****************************************************************************/
97
pvr_pds_encode_ld_src0(uint64_t dest,uint64_t count8,uint64_t src_add,bool cached,const struct pvr_device_info * dev_info)98 uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
99 uint64_t count8,
100 uint64_t src_add,
101 bool cached,
102 const struct pvr_device_info *dev_info)
103 {
104 uint64_t encoded = 0;
105
106 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
107 encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED
108 : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS);
109 }
110
111 encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
112 << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
113 encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
114 << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
115 encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED
116 : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS);
117 encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK)
118 << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
119
120 return encoded;
121 }
122
pvr_pds_encode_st_src0(uint64_t src,uint64_t count4,uint64_t dst_add,bool write_through,const struct pvr_device_info * device_info)123 uint64_t pvr_pds_encode_st_src0(uint64_t src,
124 uint64_t count4,
125 uint64_t dst_add,
126 bool write_through,
127 const struct pvr_device_info *device_info)
128 {
129 uint64_t encoded = 0;
130
131 if (device_info->features.has_slc_mcu_cache_controls) {
132 encoded |= (write_through
133 ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH
134 : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK);
135 }
136
137 encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
138 << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
139 encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
140 << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
141 encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH
142 : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK);
143 encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK)
144 << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
145
146 return encoded;
147 }
148
149 static ALWAYS_INLINE uint32_t
pvr_pds_encode_doutw_src1(uint32_t dest,uint32_t dword_mask,uint32_t flags,bool cached,const struct pvr_device_info * dev_info)150 pvr_pds_encode_doutw_src1(uint32_t dest,
151 uint32_t dword_mask,
152 uint32_t flags,
153 bool cached,
154 const struct pvr_device_info *dev_info)
155 {
156 assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) ||
157 ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) ||
158 (dword_mask < PVR_PDS_DOUTW_LOWER64));
159
160 uint32_t encoded =
161 (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT);
162
163 encoded |= dword_mask_const[dword_mask];
164
165 encoded |= flags;
166
167 encoded |=
168 cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0
169 : 1]
170 [cached ? 1 : 0];
171 return encoded;
172 }
173
pvr_pds_encode_doutw64(uint32_t cc,uint32_t end,uint32_t src1,uint32_t src0)174 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc,
175 uint32_t end,
176 uint32_t src1,
177 uint32_t src0)
178 {
179 return pvr_pds_inst_encode_dout(cc,
180 end,
181 src1,
182 src0,
183 PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
184 }
185
pvr_pds_encode_doutu(uint32_t cc,uint32_t end,uint32_t src0)186 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
187 uint32_t end,
188 uint32_t src0)
189 {
190 return pvr_pds_inst_encode_dout(cc,
191 end,
192 0,
193 src0,
194 PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
195 }
196
pvr_pds_inst_encode_doutc(uint32_t cc,uint32_t end)197 static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc,
198 uint32_t end)
199 {
200 return pvr_pds_inst_encode_dout(cc,
201 end,
202 0,
203 0,
204 PVR_ROGUE_PDSINST_DSTDOUT_DOUTC);
205 }
206
pvr_pds_encode_doutd(uint32_t cc,uint32_t end,uint32_t src1,uint32_t src0)207 static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc,
208 uint32_t end,
209 uint32_t src1,
210 uint32_t src0)
211 {
212 return pvr_pds_inst_encode_dout(cc,
213 end,
214 src1,
215 src0,
216 PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
217 }
218
pvr_pds_encode_douti(uint32_t cc,uint32_t end,uint32_t src0)219 static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc,
220 uint32_t end,
221 uint32_t src0)
222 {
223 return pvr_pds_inst_encode_dout(cc,
224 end,
225 0,
226 src0,
227 PVR_ROGUE_PDSINST_DSTDOUT_DOUTI);
228 }
229
pvr_pds_encode_bra(uint32_t srcc,uint32_t neg,uint32_t setc,int32_t relative_address)230 static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc,
231 uint32_t neg,
232 uint32_t setc,
233 int32_t relative_address)
234 {
235 /* Address should be signed but API only allows unsigned value. */
236 return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address);
237 }
238
239 /**
240 * Gets the next constant address and moves the next constant pointer along.
241 *
242 * \param next_constant Pointer to the next constant address.
243 * \param num_constants The number of constants required.
244 * \param count The number of constants allocated.
245 * \return The address of the next constant.
246 */
pvr_pds_get_constants(uint32_t * next_constant,uint32_t num_constants,uint32_t * count)247 static uint32_t pvr_pds_get_constants(uint32_t *next_constant,
248 uint32_t num_constants,
249 uint32_t *count)
250 {
251 uint32_t constant;
252
253 /* Work out starting constant number. For even number of constants, start on
254 * a 64-bit boundary.
255 */
256 if (num_constants & 1)
257 constant = *next_constant;
258 else
259 constant = (*next_constant + 1) & ~1;
260
261 /* Update the count with the number of constants actually allocated. */
262 *count += constant + num_constants - *next_constant;
263
264 /* Move the next constant pointer. */
265 *next_constant = constant + num_constants;
266
267 assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE);
268
269 return constant;
270 }
271
272 /**
273 * Gets the next temp address and moves the next temp pointer along.
274 *
275 * \param next_temp Pointer to the next temp address.
276 * \param num_temps The number of temps required.
277 * \param count The number of temps allocated.
278 * \return The address of the next temp.
279 */
280 static uint32_t
pvr_pds_get_temps(uint32_t * next_temp,uint32_t num_temps,uint32_t * count)281 pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count)
282 {
283 uint32_t temp;
284
285 /* Work out starting temp number. For even number of temps, start on a
286 * 64-bit boundary.
287 */
288 if (num_temps & 1)
289 temp = *next_temp;
290 else
291 temp = (*next_temp + 1) & ~1;
292
293 /* Update the count with the number of temps actually allocated. */
294 *count += temp + num_temps - *next_temp;
295
296 /* Move the next temp pointer. */
297 *next_temp = temp + num_temps;
298
299 assert((temp + num_temps) <=
300 (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE));
301
302 return temp;
303 }
304
305 /**
306 * Write a 32-bit constant indexed by the long range.
307 *
308 * \param data_block Pointer to data block to write to.
309 * \param index Index within the data to write to.
310 * \param dword The 32-bit constant to write.
311 */
312 static void
pvr_pds_write_constant32(uint32_t * data_block,uint32_t index,uint32_t dword0)313 pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0)
314 {
315 /* Check range. */
316 assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER -
317 PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER));
318
319 data_block[index + 0] = dword0;
320
321 PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index);
322 }
323
324 /**
325 * Write a 64-bit constant indexed by the long range.
326 *
327 * \param data_block Pointer to data block to write to.
328 * \param index Index within the data to write to.
329 * \param dword0 Lower half of the 64 bit constant.
330 * \param dword1 Upper half of the 64 bit constant.
331 */
pvr_pds_write_constant64(uint32_t * data_block,uint32_t index,uint32_t dword0,uint32_t dword1)332 static void pvr_pds_write_constant64(uint32_t *data_block,
333 uint32_t index,
334 uint32_t dword0,
335 uint32_t dword1)
336 {
337 /* Has to be on 64 bit boundary. */
338 assert((index & 1) == 0);
339
340 /* Check range. */
341 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
342 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
343
344 data_block[index + 0] = dword0;
345 data_block[index + 1] = dword1;
346
347 PVR_PDS_PRINT_DATA("WriteConstant64",
348 ((uint64_t)dword0 << 32) | (uint64_t)dword1,
349 index);
350 }
351
352 /**
353 * Write a 64-bit constant from a single wide word indexed by the long-range
354 * number.
355 *
356 * \param data_block Pointer to data block to write to.
357 * \param index Index within the data to write to.
358 * \param word The 64-bit constant to write.
359 */
360
361 static void
pvr_pds_write_wide_constant(uint32_t * data_block,uint32_t index,uint64_t word)362 pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word)
363 {
364 /* Has to be on 64 bit boundary. */
365 assert((index & 1) == 0);
366
367 /* Check range. */
368 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
369 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
370
371 data_block[index + 0] = L32(word);
372 data_block[index + 1] = H32(word);
373
374 PVR_PDS_PRINT_DATA("WriteWideConstant", word, index);
375 }
376
pvr_pds_write_dma_address(uint32_t * data_block,uint32_t index,uint64_t address,bool coherent,const struct pvr_device_info * dev_info)377 static void pvr_pds_write_dma_address(uint32_t *data_block,
378 uint32_t index,
379 uint64_t address,
380 bool coherent,
381 const struct pvr_device_info *dev_info)
382 {
383 /* Has to be on 64 bit boundary. */
384 assert((index & 1) == 0);
385
386 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
387 address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
388
389 /* Check range. */
390 assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
391 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
392
393 data_block[index + 0] = L32(address);
394 data_block[index + 1] = H32(address);
395
396 PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index);
397 }
398
399 /**
400 * External API to append a 64-bit constant to an existing data segment
401 * allocation.
402 *
403 * \param constants Pointer to start of data segment.
404 * \param constant_value Value to write to constant.
405 * \param data_size The number of constants allocated.
406 * \returns The address of the next constant.
407 */
pvr_pds_append_constant64(uint32_t * constants,uint64_t constant_value,uint32_t * data_size)408 uint32_t pvr_pds_append_constant64(uint32_t *constants,
409 uint64_t constant_value,
410 uint32_t *data_size)
411 {
412 /* Calculate next constant from current data size. */
413 uint32_t next_constant = *data_size;
414 uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size);
415
416 /* Set the value. */
417 pvr_pds_write_wide_constant(constants, constant, constant_value);
418
419 return constant;
420 }
421
pvr_pds_pixel_shader_sa_initialize(struct pvr_pds_pixel_shader_sa_program * program)422 void pvr_pds_pixel_shader_sa_initialize(
423 struct pvr_pds_pixel_shader_sa_program *program)
424 {
425 memset(program, 0, sizeof(*program));
426 }
427
428 /**
429 * Encode a DMA burst.
430 *
431 * \param dma_control DMA control words.
432 * \param dma_address DMA address.
433 * \param dest_offset Destination offset in the attribute.
434 * \param dma_size The size of the DMA in words.
435 * \param src_address Source address for the burst.
436 * \param last Last DMA in program.
437 * \param dev_info PVR device info structure.
438 * \returns The number of DMA transfers required.
439 */
pvr_pds_encode_dma_burst(uint32_t * dma_control,uint64_t * dma_address,uint32_t dest_offset,uint32_t dma_size,uint64_t src_address,bool last,const struct pvr_device_info * dev_info)440 uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
441 uint64_t *dma_address,
442 uint32_t dest_offset,
443 uint32_t dma_size,
444 uint64_t src_address,
445 bool last,
446 const struct pvr_device_info *dev_info)
447 {
448 dma_control[0] = dma_size
449 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
450 dma_control[0] |= dest_offset
451 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
452
453 dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
454 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
455
456 if (last)
457 dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
458
459 dma_address[0] = src_address;
460 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
461 dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
462
463 /* Force to 1 DMA. */
464 return 1;
465 }
466
467 /* FIXME: use the csbgen interface and pvr_csb_pack.
468 * FIXME: use bool for phase_rate_change.
469 */
470 /**
471 * Sets up the USC control words for a DOUTU.
472 *
473 * \param usc_task_control USC task control structure to be setup.
474 * \param execution_address USC execution virtual address.
475 * \param usc_temps Number of USC temps.
476 * \param sample_rate Sample rate for the DOUTU.
477 * \param phase_rate_change Phase rate change for the DOUTU.
478 */
pvr_pds_setup_doutu(struct pvr_pds_usc_task_control * usc_task_control,uint64_t execution_address,uint32_t usc_temps,uint32_t sample_rate,bool phase_rate_change)479 void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
480 uint64_t execution_address,
481 uint32_t usc_temps,
482 uint32_t sample_rate,
483 bool phase_rate_change)
484 {
485 usc_task_control->src0 = UINT64_C(0);
486
487 /* Set the execution address. */
488 pvr_set_usc_execution_address64(&(usc_task_control->src0),
489 execution_address);
490
491 if (usc_temps > 0) {
492 /* Temps are allocated in blocks of 4 dwords. */
493 usc_temps =
494 DIV_ROUND_UP(usc_temps,
495 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE);
496
497 /* Check for losing temps due to too many requested. */
498 assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) ==
499 usc_temps);
500
501 usc_task_control->src0 |=
502 ((uint64_t)(usc_temps &
503 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK))
504 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT;
505 }
506
507 if (sample_rate > 0) {
508 usc_task_control->src0 |=
509 ((uint64_t)sample_rate)
510 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT;
511 }
512
513 if (phase_rate_change) {
514 usc_task_control->src0 |=
515 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN;
516 }
517 }
518
519 /**
520 * Generates the PDS pixel event program.
521 *
522 * \param program Pointer to the PDS pixel event program.
523 * \param buffer Pointer to the buffer for the program.
524 * \param gen_mode Generate either a data segment or code segment.
525 * \param dev_info PVR device info structure.
526 * \returns Pointer to just beyond the buffer for the program.
527 */
528 uint32_t *
pvr_pds_generate_pixel_event(struct pvr_pds_event_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)529 pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
530 uint32_t *restrict buffer,
531 enum pvr_pds_generate_mode gen_mode,
532 const struct pvr_device_info *dev_info)
533 {
534 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
535 uint32_t *constants = buffer;
536
537 uint32_t data_size = 0;
538
539 /* Copy the DMA control words and USC task control words to constants, then
540 * arrange them so that the 64-bit words are together followed by the 32-bit
541 * words.
542 */
543 uint32_t control_constant =
544 pvr_pds_get_constants(&next_constant, 2, &data_size);
545 uint32_t emit_constant =
546 pvr_pds_get_constants(&next_constant,
547 (2 * program->num_emit_word_pairs),
548 &data_size);
549
550 uint32_t control_word_constant =
551 pvr_pds_get_constants(&next_constant,
552 program->num_emit_word_pairs,
553 &data_size);
554
555 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
556 /* Src0 for DOUTU. */
557 pvr_pds_write_wide_constant(buffer,
558 control_constant,
559 program->task_control.src0); /* DOUTU */
560 /* 64-bit Src0. */
561
562 /* Emit words for end of tile program. */
563 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
564 pvr_pds_write_constant64(constants,
565 emit_constant + (2 * i),
566 program->emit_words[(2 * i) + 0],
567 program->emit_words[(2 * i) + 1]);
568 }
569
570 /* Control words. */
571 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
572 uint32_t doutw = pvr_pds_encode_doutw_src1(
573 (2 * i),
574 PVR_PDS_DOUTW_LOWER64,
575 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
576 false,
577 dev_info);
578
579 if (i == (program->num_emit_word_pairs - 1))
580 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
581
582 pvr_pds_write_constant32(constants, control_word_constant + i, doutw);
583 }
584 }
585
586 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
587 /* DOUTW the state into the shared register. */
588 for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
589 *buffer++ = pvr_pds_encode_doutw64(
590 /* cc */ 0,
591 /* END */ 0,
592 /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */
593 /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0
594 */
595 }
596
597 /* Kick the USC. */
598 *buffer++ = pvr_pds_encode_doutu(
599 /* cc */ 0,
600 /* END */ 1,
601 /* SRC0 */ control_constant >> 1);
602 }
603
604 uint32_t code_size = 1 + program->num_emit_word_pairs;
605
606 /* Save the data segment Pointer and size. */
607 program->data_segment = constants;
608 program->data_size = data_size;
609 program->code_size = code_size;
610
611 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
612 return (constants + next_constant);
613
614 if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
615 return buffer;
616
617 return NULL;
618 }
619
620 /**
621 * Checks if any of the vertex streams contains instance data.
622 *
623 * \param streams Streams contained in the vertex shader.
624 * \param num_streams Number of vertex streams.
625 * \returns true if one or more of the given vertex streams contains
626 * instance data, otherwise false.
627 */
pvr_pds_vertex_streams_contains_instance_data(const struct pvr_pds_vertex_stream * streams,uint32_t num_streams)628 static bool pvr_pds_vertex_streams_contains_instance_data(
629 const struct pvr_pds_vertex_stream *streams,
630 uint32_t num_streams)
631 {
632 for (uint32_t i = 0; i < num_streams; i++) {
633 const struct pvr_pds_vertex_stream *vertex_stream = &streams[i];
634 if (vertex_stream->instance_data)
635 return true;
636 }
637
638 return false;
639 }
640
pvr_pds_get_bank_based_constants(uint32_t num_backs,uint32_t * next_constant,uint32_t num_constants,uint32_t * count)641 static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs,
642 uint32_t *next_constant,
643 uint32_t num_constants,
644 uint32_t *count)
645 {
646 /* Allocate constant for PDS vertex shader where constant is divided into
647 * banks.
648 */
649 uint32_t constant;
650
651 assert(num_constants == 1 || num_constants == 2);
652
653 if (*next_constant >= (num_backs << 3))
654 return pvr_pds_get_constants(next_constant, num_constants, count);
655
656 if ((*next_constant % 8) == 0) {
657 constant = *next_constant;
658
659 if (num_constants == 1)
660 *next_constant += 1;
661 else
662 *next_constant += 8;
663 } else if (num_constants == 1) {
664 constant = *next_constant;
665 *next_constant += 7;
666 } else {
667 *next_constant += 7;
668 constant = *next_constant;
669
670 if (*next_constant >= (num_backs << 3)) {
671 *next_constant += 2;
672 *count += 2;
673 } else {
674 *next_constant += 8;
675 }
676 }
677 return constant;
678 }
679
680 /**
681 * Generates a PDS program to load USC vertex inputs based from one or more
682 * vertex buffers, each containing potentially multiple elements, and then a
683 * DOUTU to execute the USC.
684 *
685 * \param program Pointer to the description of the program which should be
686 * generated.
687 * \param buffer Pointer to buffer that receives the output of this function.
688 * Will either be the data segment or code segment depending on
689 * gen_mode.
690 * \param gen_mode Which part to generate, either data segment or
691 * code segment. If PDS_GENERATE_SIZES is specified, nothing is
692 * written, but size information in program is updated.
693 * \param dev_info PVR device info structure.
694 * \returns Pointer to just beyond the buffer for the data - i.e the value
695 * of the buffer after writing its contents.
696 */
697 /* FIXME: Implement PDS_GENERATE_CODEDATA_SEGMENTS? */
698 uint32_t *
pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)699 pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
700 uint32_t *restrict buffer,
701 enum pvr_pds_generate_mode gen_mode,
702 const struct pvr_device_info *dev_info)
703 {
704 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
705 uint32_t next_stream_constant;
706 uint32_t next_temp;
707 uint32_t usc_control_constant64;
708 uint32_t stride_constant32 = 0;
709 uint32_t dma_address_constant64 = 0;
710 uint32_t dma_control_constant64;
711 uint32_t multiplier_constant32 = 0;
712 uint32_t base_instance_const32 = 0;
713
714 uint32_t temp = 0;
715 uint32_t index_temp64 = 0;
716 uint32_t num_vertices_temp64 = 0;
717 uint32_t pre_index_temp = (uint32_t)(-1);
718 bool first_ddmadt = true;
719 uint32_t input_register0;
720 uint32_t input_register1;
721 uint32_t input_register2;
722
723 struct pvr_pds_vertex_stream *vertex_stream;
724 struct pvr_pds_vertex_element *vertex_element;
725 uint32_t shift_2s_comp;
726
727 uint32_t data_size = 0;
728 uint32_t code_size = 0;
729 uint32_t temps_used = 0;
730
731 bool direct_writes_needed = false;
732
733 uint32_t consts_size = 0;
734 uint32_t vertex_id_control_word_const32 = 0;
735 uint32_t instance_id_control_word_const32 = 0;
736 uint32_t instance_id_modifier_word_const32 = 0;
737 uint32_t geometry_id_control_word_const64 = 0;
738 uint32_t empty_dma_control_constant64 = 0;
739
740 bool any_instanced_stream =
741 pvr_pds_vertex_streams_contains_instance_data(program->streams,
742 program->num_streams);
743
744 uint32_t base_instance_register = 0;
745 uint32_t ddmadt_enables = 0;
746
747 bool issue_empty_ddmad = false;
748 uint32_t last_stream_index = program->num_streams - 1;
749 bool current_p0 = false;
750 uint32_t skip_stream_flag = 0;
751
752 /* Generate the PDS vertex shader data. */
753
754 #if MESA_DEBUG
755 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
756 for (uint32_t i = 0; i < program->data_size; i++)
757 buffer[i] = 0xDEADBEEF;
758 }
759 #endif
760
761 /* Generate the PDS vertex shader program */
762 next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
763 /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */
764 input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
765 /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */
766 input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
767
768 if (program->iterate_remap_id)
769 input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
770 else
771 input_register2 = 0; /* Not used, but need to silence the compiler. */
772
773 /* Generate the PDS vertex shader code. The constants in the data block are
774 * arranged as follows:
775 *
776 * 64 bit bank 0 64 bit bank 1 64 bit bank 2 64 bit bank
777 * 3 Not used (tmps) Stride | Multiplier Address Control
778 */
779
780 /* Find out how many constants are needed by streams. */
781 for (uint32_t stream = 0; stream < program->num_streams; stream++) {
782 pvr_pds_get_constants(&next_constant,
783 8 * program->streams[stream].num_elements,
784 &consts_size);
785 }
786
787 /* If there are no vertex streams allocate the first bank for USC Code
788 * Address.
789 */
790 if (consts_size == 0)
791 pvr_pds_get_constants(&next_constant, 2, &consts_size);
792 else
793 next_constant = 8;
794
795 direct_writes_needed = program->iterate_instance_id ||
796 program->iterate_vtx_id || program->iterate_remap_id;
797
798 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
799 /* Evaluate what config of DDMAD should be used for each stream. */
800 for (uint32_t stream = 0; stream < program->num_streams; stream++) {
801 vertex_stream = &program->streams[stream];
802
803 if (vertex_stream->use_ddmadt) {
804 ddmadt_enables |= (1 << stream);
805
806 /* The condition for index value is:
807 * index * stride + size <= bufferSize (all in unit of byte)
808 */
809 if (vertex_stream->stride == 0) {
810 if (vertex_stream->elements[0].size <=
811 vertex_stream->buffer_size_in_bytes) {
812 /* index can be any value -> no need to use DDMADT. */
813 ddmadt_enables &= (~(1 << stream));
814 } else {
815 /* No index works -> no need to issue DDMAD instruction.
816 */
817 skip_stream_flag |= (1 << stream);
818 }
819 } else {
820 /* index * stride + size <= bufferSize
821 *
822 * can be converted to:
823 * index <= (bufferSize - size) / stride
824 *
825 * where maximum index is:
826 * integer((bufferSize - size) / stride).
827 */
828 if (vertex_stream->buffer_size_in_bytes <
829 vertex_stream->elements[0].size) {
830 /* No index works -> no need to issue DDMAD instruction.
831 */
832 skip_stream_flag |= (1 << stream);
833 } else {
834 uint32_t max_index = (vertex_stream->buffer_size_in_bytes -
835 vertex_stream->elements[0].size) /
836 vertex_stream->stride;
837 if (max_index == 0xFFFFFFFFu) {
838 /* No need to use DDMADT as all possible indices can
839 * pass the test.
840 */
841 ddmadt_enables &= (~(1 << stream));
842 } else {
843 /* In this case, test condition can be changed to
844 * index < max_index + 1.
845 */
846 program->streams[stream].num_vertices =
847 pvr_pds_get_bank_based_constants(program->num_streams,
848 &next_constant,
849 1,
850 &consts_size);
851
852 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
853 pvr_pds_write_constant32(
854 buffer,
855 program->streams[stream].num_vertices,
856 max_index + 1);
857 }
858 }
859 }
860 }
861 }
862
863 if ((skip_stream_flag & (1 << stream)) == 0) {
864 issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0;
865 last_stream_index = stream;
866 }
867 }
868 } else {
869 if (program->num_streams > 0 &&
870 program->streams[program->num_streams - 1].use_ddmadt) {
871 issue_empty_ddmad = true;
872 }
873 }
874
875 if (direct_writes_needed)
876 issue_empty_ddmad = false;
877
878 if (issue_empty_ddmad) {
879 /* An empty DMA control const (DMA size = 0) is required in case the
880 * last DDMADD is predicated out and last flag does not have any usage.
881 */
882 empty_dma_control_constant64 =
883 pvr_pds_get_bank_based_constants(program->num_streams,
884 &next_constant,
885 2,
886 &consts_size);
887 }
888
889 /* Assign constants for non stream or base instance if there is any
890 * instanced stream.
891 */
892 if (direct_writes_needed || any_instanced_stream ||
893 program->instance_id_modifier) {
894 if (program->iterate_vtx_id) {
895 vertex_id_control_word_const32 =
896 pvr_pds_get_bank_based_constants(program->num_streams,
897 &next_constant,
898 1,
899 &consts_size);
900 }
901
902 if (program->iterate_instance_id || program->instance_id_modifier) {
903 if (program->instance_id_modifier == 0) {
904 instance_id_control_word_const32 =
905 pvr_pds_get_bank_based_constants(program->num_streams,
906 &next_constant,
907 1,
908 &consts_size);
909 } else {
910 instance_id_modifier_word_const32 =
911 pvr_pds_get_bank_based_constants(program->num_streams,
912 &next_constant,
913 1,
914 &consts_size);
915 if ((instance_id_modifier_word_const32 % 2) == 0) {
916 instance_id_control_word_const32 =
917 pvr_pds_get_bank_based_constants(program->num_streams,
918 &next_constant,
919 1,
920 &consts_size);
921 } else {
922 instance_id_control_word_const32 =
923 instance_id_modifier_word_const32;
924 instance_id_modifier_word_const32 =
925 pvr_pds_get_bank_based_constants(program->num_streams,
926 &next_constant,
927 1,
928 &consts_size);
929 }
930 }
931 }
932
933 if (program->base_instance != 0) {
934 base_instance_const32 =
935 pvr_pds_get_bank_based_constants(program->num_streams,
936 &next_constant,
937 1,
938 &consts_size);
939 }
940
941 if (program->iterate_remap_id) {
942 geometry_id_control_word_const64 =
943 pvr_pds_get_bank_based_constants(program->num_streams,
944 &next_constant,
945 2,
946 &consts_size);
947 }
948 }
949
950 if (program->instance_id_modifier != 0) {
951 /* This instanceID modifier is used when a draw array instanced call
952 * sourcing from client data cannot fit into vertex buffer and needs to
953 * be broken down into several draw calls.
954 */
955
956 code_size += 1;
957
958 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
959 pvr_pds_write_constant32(buffer,
960 instance_id_modifier_word_const32,
961 program->instance_id_modifier);
962 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
963 *buffer++ = pvr_pds_inst_encode_add32(
964 /* cc */ 0x0,
965 /* ALUM */ 0, /* Unsigned */
966 /* SNA */ 0, /* Add */
967 /* SRC0 32b */ instance_id_modifier_word_const32,
968 /* SRC1 32b */ input_register1,
969 /* DST 32b */ input_register1);
970 }
971 }
972
973 /* Adjust instanceID if necessary. */
974 if (any_instanced_stream || program->iterate_instance_id) {
975 if (program->base_instance != 0) {
976 assert(!program->draw_indirect);
977
978 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
979 pvr_pds_write_constant32(buffer,
980 base_instance_const32,
981 program->base_instance);
982 }
983
984 base_instance_register = base_instance_const32;
985 }
986
987 if (program->draw_indirect) {
988 assert((program->instance_id_modifier == 0) &&
989 (program->base_instance == 0));
990
991 base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1;
992 }
993 }
994
995 next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
996 usc_control_constant64 =
997 pvr_pds_get_constants(&next_stream_constant, 2, &data_size);
998
999 for (uint32_t stream = 0; stream < program->num_streams; stream++) {
1000 bool instance_data_with_base_instance;
1001
1002 if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1003 ((skip_stream_flag & (1 << stream)) != 0)) {
1004 continue;
1005 }
1006
1007 vertex_stream = &program->streams[stream];
1008
1009 instance_data_with_base_instance =
1010 ((vertex_stream->instance_data) &&
1011 ((program->base_instance > 0) || (program->draw_indirect)));
1012
1013 /* Get all 8 32-bit constants at once, only 6 for first stream due to
1014 * USC constants.
1015 */
1016 if (stream == 0) {
1017 stride_constant32 =
1018 pvr_pds_get_constants(&next_stream_constant, 6, &data_size);
1019 } else {
1020 next_constant =
1021 pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1022
1023 /* Skip bank 0. */
1024 stride_constant32 = next_constant + 2;
1025 }
1026
1027 multiplier_constant32 = stride_constant32 + 1;
1028
1029 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1030 pvr_pds_write_constant32(buffer,
1031 stride_constant32,
1032 vertex_stream->stride);
1033
1034 /* Vertex stream frequency multiplier. */
1035 if (vertex_stream->multiplier)
1036 pvr_pds_write_constant32(buffer,
1037 multiplier_constant32,
1038 vertex_stream->multiplier);
1039 }
1040
1041 /* Update the code size count and temps count for the above code
1042 * segment.
1043 */
1044 if (vertex_stream->current_state) {
1045 code_size += 1;
1046 temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */
1047 } else {
1048 unsigned int num_temps_required = 0;
1049
1050 if (vertex_stream->multiplier) {
1051 num_temps_required += 2;
1052 code_size += 3;
1053
1054 if (vertex_stream->shift) {
1055 code_size += 1;
1056
1057 if ((int32_t)vertex_stream->shift > 0)
1058 code_size += 1;
1059 }
1060 } else if (vertex_stream->shift) {
1061 code_size += 1;
1062 num_temps_required += 1;
1063 } else if (instance_data_with_base_instance) {
1064 num_temps_required += 1;
1065 }
1066
1067 if (num_temps_required != 0) {
1068 temp = pvr_pds_get_temps(&next_temp,
1069 num_temps_required,
1070 &temps_used); /* 64-bit */
1071 } else {
1072 temp = vertex_stream->instance_data ? input_register1
1073 : input_register0;
1074 }
1075
1076 if (instance_data_with_base_instance)
1077 code_size += 1;
1078 }
1079
1080 /* The real code segment. */
1081 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1082 /* If it's current state stream, then index = 0 always. */
1083 if (vertex_stream->current_state) {
1084 /* Put zero in temp. */
1085 *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1086 } else if (vertex_stream->multiplier) {
1087 /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24)
1088 * new: Iout = (Iin * Multiplier) >> (shift+31)
1089 */
1090
1091 /* Put zero in temp. Need zero for add part of the following
1092 * MAD. MAD source is 64 bit, so need two LIMMs.
1093 */
1094 *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
1095 /* Put zero in temp. Need zero for add part of the following
1096 * MAD.
1097 */
1098 *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0);
1099
1100 /* old: (Iin * (Multiplier+2^24))
1101 * new: (Iin * Multiplier)
1102 */
1103 *buffer++ = pvr_rogue_inst_encode_mad(
1104 0, /* Sign of add is positive. */
1105 0, /* Unsigned ALU mode */
1106 0, /* Unconditional */
1107 multiplier_constant32,
1108 vertex_stream->instance_data ? input_register1 : input_register0,
1109 temp / 2,
1110 temp / 2);
1111
1112 if (vertex_stream->shift) {
1113 int32_t shift = (int32_t)vertex_stream->shift;
1114
1115 /* new: >> (shift + 31) */
1116 shift += 31;
1117 shift *= -1;
1118
1119 if (shift < -31) {
1120 /* >> (31) */
1121 shift_2s_comp = 0xFFFE1;
1122 *buffer++ = pvr_pds_inst_encode_sftlp64(
1123 /* cc */ 0,
1124 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1125 /* IM */ 1, /* enable immediate */
1126 /* SRC0 */ temp / 2,
1127 /* SRC1 */ input_register0, /* This won't be used in
1128 * a shift operation.
1129 */
1130 /* SRC2 (Shift) */ shift_2s_comp,
1131 /* DST */ temp / 2);
1132 shift += 31;
1133 }
1134
1135 /* old: >> (Shift+24)
1136 * new: >> (shift + 31)
1137 */
1138 shift_2s_comp = *((uint32_t *)&shift);
1139 *buffer++ = pvr_pds_inst_encode_sftlp64(
1140 /* cc */ 0,
1141 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1142 /* IM */ 1, /*enable immediate */
1143 /* SRC0 */ temp / 2,
1144 /* SRC1 */ input_register0, /* This won't be used in
1145 * a shift operation.
1146 */
1147 /* SRC2 (Shift) */ shift_2s_comp,
1148 /* DST */ temp / 2);
1149 }
1150
1151 if (instance_data_with_base_instance) {
1152 *buffer++ =
1153 pvr_pds_inst_encode_add32(0, /* cc */
1154 0, /* ALNUM */
1155 0, /* SNA */
1156 base_instance_register, /* src0
1157 */
1158 temp, /* src1 */
1159 temp /* dst */
1160 );
1161 }
1162 } else { /* NOT vertex_stream->multiplier */
1163 if (vertex_stream->shift) {
1164 /* Shift Index/InstanceNum Right by shift bits. Put result
1165 * in a Temp.
1166 */
1167
1168 /* 2's complement of shift as this will be a right shift. */
1169 shift_2s_comp = ~(vertex_stream->shift) + 1;
1170
1171 *buffer++ = pvr_pds_inst_encode_sftlp32(
1172 /* IM */ 1, /* enable immediate. */
1173 /* cc */ 0,
1174 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1175 /* SRC0 */ vertex_stream->instance_data ? input_register1
1176 : input_register0,
1177 /* SRC1 */ input_register0, /* This won't be used in
1178 * a shift operation.
1179 */
1180 /* SRC2 (Shift) */ shift_2s_comp,
1181 /* DST */ temp);
1182
1183 if (instance_data_with_base_instance) {
1184 *buffer++ =
1185 pvr_pds_inst_encode_add32(0, /* cc */
1186 0, /* ALNUM */
1187 0, /* SNA */
1188 base_instance_register, /* src0
1189 */
1190 temp, /* src1 */
1191 temp /* dst */
1192 );
1193 }
1194 } else {
1195 if (instance_data_with_base_instance) {
1196 *buffer++ =
1197 pvr_pds_inst_encode_add32(0, /* cc */
1198 0, /* ALNUM */
1199 0, /* SNA */
1200 base_instance_register, /* src0
1201 */
1202 input_register1, /* src1 */
1203 temp /* dst */
1204 );
1205 } else {
1206 /* If the shift instruction doesn't happen, use the IR
1207 * directly into the following MAD.
1208 */
1209 temp = vertex_stream->instance_data ? input_register1
1210 : input_register0;
1211 }
1212 }
1213 }
1214 }
1215
1216 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1217 if (vertex_stream->use_ddmadt)
1218 ddmadt_enables |= (1 << stream);
1219 } else {
1220 if ((ddmadt_enables & (1 << stream)) != 0) {
1221 /* Emulate what DDMADT does for range checking. */
1222 if (first_ddmadt) {
1223 /* Get an 64 bits temp such that cmp current index with
1224 * allowed vertex number can work.
1225 */
1226 index_temp64 =
1227 pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1228 */
1229 num_vertices_temp64 =
1230 pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
1231 */
1232
1233 index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1234 num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
1235
1236 code_size += 3;
1237 current_p0 = true;
1238 }
1239
1240 code_size += (temp == pre_index_temp ? 1 : 2);
1241
1242 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1243 if (first_ddmadt) {
1244 /* Set predicate to be P0. */
1245 *buffer++ = pvr_pds_encode_bra(
1246 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1247 */
1248 0, /* Neg */
1249 PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC
1250 */
1251 1); /* Addr */
1252
1253 *buffer++ =
1254 pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0);
1255 *buffer++ =
1256 pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0);
1257 }
1258
1259 if (temp != pre_index_temp) {
1260 *buffer++ = pvr_pds_inst_encode_sftlp32(
1261 /* IM */ 1, /* enable immediate. */
1262 /* cc */ 0,
1263 /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
1264 /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER,
1265 /* SRC1 */ 0,
1266 /* SRC2 (Shift) */ 0,
1267 /* DST */ index_temp64);
1268 }
1269
1270 *buffer++ = pvr_pds_inst_encode_sftlp32(
1271 /* IM */ 1, /* enable immediate. */
1272 /* cc */ 0,
1273 /* LOP */ PVR_ROGUE_PDSINST_LOP_OR,
1274 /* SRC0 */ num_vertices_temp64 + 1,
1275 /* SRC1 */ vertex_stream->num_vertices,
1276 /* SRC2 (Shift) */ 0,
1277 /* DST */ num_vertices_temp64);
1278 }
1279
1280 first_ddmadt = false;
1281
1282 pre_index_temp = temp;
1283 }
1284 }
1285
1286 /* Process the elements in the stream. */
1287 for (uint32_t element = 0; element < vertex_stream->num_elements;
1288 element++) {
1289 bool terminate = false;
1290
1291 vertex_element = &vertex_stream->elements[element];
1292 /* Check if last DDMAD needs terminate or not. */
1293 if ((element == (vertex_stream->num_elements - 1)) &&
1294 (stream == last_stream_index)) {
1295 terminate = !issue_empty_ddmad && !direct_writes_needed;
1296 }
1297
1298 /* Get a new set of constants for this element. */
1299 if (element) {
1300 /* Get all 8 32 bit constants at once. */
1301 next_constant =
1302 pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
1303 }
1304
1305 dma_address_constant64 = next_constant + 4;
1306 dma_control_constant64 = dma_address_constant64 + 2;
1307
1308 if (vertex_element->component_size == 0) {
1309 /* Standard DMA.
1310 *
1311 * Write the DMA transfer control words into the PDS data
1312 * section.
1313 *
1314 * DMA Address is 40-bit.
1315 */
1316
1317 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1318 uint32_t dma_control_word;
1319 uint64_t dma_control_word64 = 0;
1320 uint32_t dma_size;
1321
1322 /* Write the address to the constant. */
1323 pvr_pds_write_dma_address(buffer,
1324 dma_address_constant64,
1325 vertex_stream->address +
1326 (uint64_t)vertex_element->offset,
1327 false,
1328 dev_info);
1329 {
1330 if (program->stream_patch_offsets) {
1331 program
1332 ->stream_patch_offsets[program->num_stream_patches++] =
1333 (stream << 16) | (dma_address_constant64 >> 1);
1334 }
1335 }
1336
1337 /* Size is in bytes - round up to nearest 32 bit word. */
1338 dma_size =
1339 (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >>
1340 PVR_PDS_DWORD_SHIFT;
1341
1342 assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER);
1343
1344 /* Set up the dma transfer control word. */
1345 dma_control_word =
1346 dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1347
1348 dma_control_word |=
1349 vertex_element->reg
1350 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1351
1352 dma_control_word |=
1353 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1354 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1355
1356 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1357 if ((ddmadt_enables & (1 << stream)) != 0) {
1358 assert(
1359 ((((uint64_t)vertex_stream->buffer_size_in_bytes
1360 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1361 ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >>
1362 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) ==
1363 (uint64_t)vertex_stream->buffer_size_in_bytes);
1364 dma_control_word64 =
1365 (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN |
1366 (((uint64_t)vertex_stream->buffer_size_in_bytes
1367 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
1368 ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK));
1369 }
1370 }
1371 /* If this is the last dma then also set the last flag. */
1372 if (terminate) {
1373 dma_control_word |=
1374 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1375 }
1376
1377 /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1378 * spec.
1379 */
1380 pvr_pds_write_wide_constant(buffer,
1381 dma_control_constant64,
1382 dma_control_word64 |
1383 (uint64_t)dma_control_word);
1384 }
1385
1386 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1387 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1388 if ((ddmadt_enables & (1 << stream)) != 0) {
1389 *buffer++ = pvr_pds_inst_encode_cmp(
1390 0, /* cc enable */
1391 PVR_ROGUE_PDSINST_COP_LT, /* Operation */
1392 index_temp64 >> 1, /* SRC0 (REGS64TP) */
1393 (num_vertices_temp64 >> 1) +
1394 PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1
1395 (REGS64)
1396 */
1397 }
1398 }
1399 /* Multiply by the vertex stream stride and add the base
1400 * followed by a DOUTD.
1401 *
1402 * dmad32 (C0 * T0) + C1, C2
1403 * src0 = stride src1 = index src2 = baseaddr src3 =
1404 * doutd part
1405 */
1406
1407 uint32_t cc;
1408 if (PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1409 cc = 0;
1410 else
1411 cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0;
1412
1413 *buffer++ = pvr_pds_inst_encode_ddmad(
1414 /* cc */ cc,
1415 /* END */ 0,
1416 /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1417 /* SRC1 */ temp, /* Index 32-bit*/
1418 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1419 * Address
1420 * +
1421 * Offset
1422 */
1423 /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1424 * Transfer
1425 * Control
1426 * Word.
1427 */
1428 );
1429 }
1430
1431 if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
1432 ((ddmadt_enables & (1 << stream)) != 0)) {
1433 code_size += 1;
1434 }
1435 code_size += 1;
1436 } else {
1437 /* Repeat DMA.
1438 *
1439 * Write the DMA transfer control words into the PDS data
1440 * section.
1441 *
1442 * DMA address is 40-bit.
1443 */
1444
1445 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1446 uint32_t dma_control_word;
1447
1448 /* Write the address to the constant. */
1449 pvr_pds_write_dma_address(buffer,
1450 dma_address_constant64,
1451 vertex_stream->address +
1452 (uint64_t)vertex_element->offset,
1453 false,
1454 dev_info);
1455
1456 /* Set up the DMA transfer control word. */
1457 dma_control_word =
1458 vertex_element->size
1459 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
1460
1461 dma_control_word |=
1462 vertex_element->reg
1463 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
1464
1465 switch (vertex_element->component_size) {
1466 case 4: {
1467 dma_control_word |=
1468 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR;
1469 break;
1470 }
1471 case 3: {
1472 dma_control_word |=
1473 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE;
1474 break;
1475 }
1476 case 2: {
1477 dma_control_word |=
1478 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO;
1479 break;
1480 }
1481 default: {
1482 dma_control_word |=
1483 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE;
1484 break;
1485 }
1486 }
1487
1488 dma_control_word |=
1489 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT;
1490
1491 dma_control_word |=
1492 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
1493 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
1494
1495 /* If this is the last dma then also set the last flag. */
1496 if (terminate) {
1497 dma_control_word |=
1498 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
1499 }
1500
1501 /* Write the 32-Bit SRC3 word to a 64-bit constant as per
1502 * spec.
1503 */
1504 pvr_pds_write_wide_constant(buffer,
1505 dma_control_constant64,
1506 (uint64_t)dma_control_word);
1507 }
1508
1509 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1510 /* Multiply by the vertex stream stride and add the base
1511 * followed by a DOUTD.
1512 *
1513 * dmad32 (C0 * T0) + C1, C2
1514 * src0 = stride src1 = index src2 = baseaddr src3 =
1515 * doutd part
1516 */
1517 *buffer++ = pvr_pds_inst_encode_ddmad(
1518 /* cc */ 0,
1519 /* END */ 0,
1520 /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1521 /* SRC1 */ temp, /* Index 32-bit*/
1522 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1523 * Address
1524 * +
1525 * Offset.
1526 */
1527 /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
1528 * Transfer
1529 * Control
1530 * Word.
1531 */
1532 );
1533 }
1534
1535 code_size += 1;
1536 } /* End of repeat DMA. */
1537 } /* Element loop */
1538 } /* Stream loop */
1539
1540 if (issue_empty_ddmad) {
1541 /* Issue an empty last DDMAD, always executed. */
1542 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1543 pvr_pds_write_wide_constant(
1544 buffer,
1545 empty_dma_control_constant64,
1546 PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN);
1547 }
1548
1549 code_size += 1;
1550
1551 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1552 *buffer++ = pvr_pds_inst_encode_ddmad(
1553 /* cc */ 0,
1554 /* END */ 0,
1555 /* SRC0 */ stride_constant32, /* Stride 32-bit*/
1556 /* SRC1 */ temp, /* Index 32-bit*/
1557 /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
1558 *Address +
1559 *Offset.
1560 */
1561 /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA
1562 * Transfer
1563 * Control
1564 * Word.
1565 */
1566 );
1567 }
1568 }
1569
1570 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
1571 if (current_p0) {
1572 code_size += 1;
1573
1574 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1575 /* Revert predicate back to IF0 which is required by DOUTU. */
1576 *buffer++ =
1577 pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
1578 */
1579 0, /* Neg */
1580 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC
1581 */
1582 1); /* Addr */
1583 }
1584 }
1585 }
1586 /* Send VertexID if requested. */
1587 if (program->iterate_vtx_id) {
1588 if (program->draw_indirect) {
1589 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1590 *buffer++ = pvr_pds_inst_encode_add32(
1591 /* cc */ 0x0,
1592 /* ALUM */ 0, /* Unsigned */
1593 /* SNA */ 1, /* Minus */
1594 /* SRC0 32b */ input_register0, /* vertexID */
1595 /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base
1596 * vertexID.
1597 */
1598 /* DST 32b */ input_register0);
1599 }
1600
1601 code_size += 1;
1602 }
1603
1604 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1605 uint32_t doutw = pvr_pds_encode_doutw_src1(
1606 program->vtx_id_register,
1607 PVR_PDS_DOUTW_LOWER32,
1608 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1609 false,
1610 dev_info);
1611
1612 if (!program->iterate_instance_id && !program->iterate_remap_id)
1613 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1614
1615 pvr_pds_write_constant32(buffer,
1616 vertex_id_control_word_const32,
1617 doutw);
1618 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1619 *buffer++ = pvr_pds_encode_doutw64(
1620 /* cc */ 0,
1621 /* END */ 0,
1622 /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1
1623 */
1624 /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */
1625 }
1626
1627 code_size += 1;
1628 }
1629
1630 /* Send InstanceID if requested. */
1631 if (program->iterate_instance_id) {
1632 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1633 uint32_t doutw = pvr_pds_encode_doutw_src1(
1634 program->instance_id_register,
1635 PVR_PDS_DOUTW_UPPER32,
1636 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
1637 true,
1638 dev_info);
1639
1640 if (!program->iterate_remap_id)
1641 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1642
1643 pvr_pds_write_constant32(buffer,
1644 instance_id_control_word_const32,
1645 doutw);
1646 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1647 *buffer++ = pvr_pds_encode_doutw64(
1648 /* cc */ 0,
1649 /* END */ 0,
1650 /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */
1651 /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */
1652 }
1653
1654 code_size += 1;
1655 }
1656
1657 /* Send remapped index number to vi0. */
1658 if (program->iterate_remap_id) {
1659 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1660 uint32_t doutw = pvr_pds_encode_doutw_src1(
1661 0 /* vi0 */,
1662 PVR_PDS_DOUTW_LOWER32,
1663 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
1664 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
1665 false,
1666 dev_info);
1667
1668 pvr_pds_write_constant64(buffer,
1669 geometry_id_control_word_const64,
1670 doutw,
1671 0);
1672 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1673 *buffer++ = pvr_pds_encode_doutw64(
1674 /* cc */ 0,
1675 /* END */ 0,
1676 /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit
1677 * Src1
1678 */
1679 /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */
1680 }
1681
1682 code_size += 1;
1683 }
1684
1685 /* Copy the USC task control words to constants. */
1686 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1687 pvr_pds_write_wide_constant(buffer,
1688 usc_control_constant64,
1689 program->usc_task_control.src0); /* 64-bit
1690 * Src0
1691 */
1692 if (program->stream_patch_offsets) {
1693 /* USC TaskControl is always the first patch. */
1694 program->stream_patch_offsets[0] = usc_control_constant64 >> 1;
1695 }
1696 }
1697
1698 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
1699 /* Conditionally (if last in task) issue the task to the USC
1700 * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2.
1701 */
1702
1703 *buffer++ = pvr_pds_encode_doutu(
1704 /* cc */ 1,
1705 /* END */ 1,
1706 /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */
1707
1708 /* End the program if the Dout did not already end it. */
1709 *buffer++ = pvr_pds_inst_encode_halt(0);
1710 }
1711
1712 code_size += 2;
1713
1714 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1715 /* Set the data segment pointer and ensure we return 1 past the buffer
1716 * ptr.
1717 */
1718 program->data_segment = buffer;
1719
1720 buffer += consts_size;
1721 }
1722
1723 program->temps_used = temps_used;
1724 program->data_size = consts_size;
1725 program->code_size = code_size;
1726 program->ddmadt_enables = ddmadt_enables;
1727 if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt))
1728 program->skip_stream_flag = skip_stream_flag;
1729
1730 return buffer;
1731 }
1732
1733 /**
1734 * Generates a PDS program to load USC compute shader global/local/workgroup
1735 * sizes/ids and then a DOUTU to execute the USC.
1736 *
1737 * \param program Pointer to description of the program that should be
1738 * generated.
1739 * \param buffer Pointer to buffer that receives the output of this function.
1740 * This will be either the data segment, or the code depending on
1741 * gen_mode.
1742 * \param gen_mode Which part to generate, either data segment or code segment.
1743 * If PDS_GENERATE_SIZES is specified, nothing is written, but
1744 * size information in program is updated.
1745 * \param dev_info PVR device info struct.
1746 * \returns Pointer to just beyond the buffer for the data - i.e. the value of
1747 * the buffer after writing its contents.
1748 */
1749 uint32_t *
pvr_pds_compute_shader(struct pvr_pds_compute_shader_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)1750 pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
1751 uint32_t *restrict buffer,
1752 enum pvr_pds_generate_mode gen_mode,
1753 const struct pvr_device_info *dev_info)
1754 {
1755 uint32_t usc_control_constant64;
1756 uint32_t usc_control_constant64_coeff_update = 0;
1757 uint32_t zero_constant64 = 0;
1758
1759 uint32_t data_size = 0;
1760 uint32_t code_size = 0;
1761 uint32_t temps_used = 0;
1762 uint32_t doutw = 0;
1763
1764 uint32_t barrier_ctrl_word = 0;
1765 uint32_t barrier_ctrl_word2 = 0;
1766
1767 /* Even though there are 3 IDs for local and global we only need max one
1768 * DOUTW for local, and two for global.
1769 */
1770 uint32_t work_group_id_ctrl_words[2] = { 0 };
1771 uint32_t local_id_ctrl_word = 0;
1772 uint32_t local_input_register;
1773
1774 /* For the constant value to load into ptemp (SW fence). */
1775 uint64_t predicate_ld_src0_constant = 0;
1776 uint32_t cond_render_negate_constant = 0;
1777
1778 uint32_t cond_render_pred_temp;
1779 uint32_t cond_render_negate_temp;
1780
1781 /* 2x 64 bit registers that will mask out the Predicate load. */
1782 uint32_t cond_render_pred_mask_constant = 0;
1783
1784 #if MESA_DEBUG
1785 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1786 for (uint32_t j = 0; j < program->data_size; j++)
1787 buffer[j] = 0xDEADBEEF;
1788 }
1789 #endif
1790
1791 /* All the compute input registers are in temps. */
1792 temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS;
1793
1794 uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used;
1795
1796 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
1797
1798 if (program->kick_usc) {
1799 /* Copy the USC task control words to constants. */
1800 usc_control_constant64 =
1801 pvr_pds_get_constants(&next_constant, 2, &data_size);
1802 }
1803
1804 if (program->has_coefficient_update_task) {
1805 usc_control_constant64_coeff_update =
1806 pvr_pds_get_constants(&next_constant, 2, &data_size);
1807 }
1808
1809 if (program->conditional_render) {
1810 predicate_ld_src0_constant =
1811 pvr_pds_get_constants(&next_constant, 2, &data_size);
1812 cond_render_negate_constant =
1813 pvr_pds_get_constants(&next_constant, 2, &data_size);
1814 cond_render_pred_mask_constant =
1815 pvr_pds_get_constants(&next_constant, 4, &data_size);
1816
1817 /* LD will load a 64 bit value. */
1818 cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used);
1819 cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used);
1820
1821 program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant;
1822 program->cond_render_pred_temp = cond_render_pred_temp;
1823 }
1824
1825 if ((program->barrier_coefficient != PVR_PDS_REG_UNUSED) ||
1826 (program->clear_pds_barrier) ||
1827 (program->kick_usc && program->conditional_render)) {
1828 zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
1829 }
1830
1831 if (program->barrier_coefficient != PVR_PDS_REG_UNUSED) {
1832 barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1833 if (PVR_HAS_QUIRK(dev_info, 51210)) {
1834 barrier_ctrl_word2 =
1835 pvr_pds_get_constants(&next_constant, 1, &data_size);
1836 }
1837 }
1838
1839 if (program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED ||
1840 program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED) {
1841 work_group_id_ctrl_words[0] =
1842 pvr_pds_get_constants(&next_constant, 1, &data_size);
1843 }
1844
1845 if (program->work_group_input_regs[2] != PVR_PDS_REG_UNUSED) {
1846 work_group_id_ctrl_words[1] =
1847 pvr_pds_get_constants(&next_constant, 1, &data_size);
1848 }
1849
1850 if ((program->local_input_regs[0] != PVR_PDS_REG_UNUSED) ||
1851 (program->local_input_regs[1] != PVR_PDS_REG_UNUSED) ||
1852 (program->local_input_regs[2] != PVR_PDS_REG_UNUSED)) {
1853 local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
1854 }
1855
1856 if (program->add_base_workgroup) {
1857 for (uint32_t workgroup_component = 0; workgroup_component < 3;
1858 workgroup_component++) {
1859 if (program->work_group_input_regs[workgroup_component] !=
1860 PVR_PDS_REG_UNUSED) {
1861 program
1862 ->base_workgroup_constant_offset_in_dwords[workgroup_component] =
1863 pvr_pds_get_constants(&next_constant, 1, &data_size);
1864 }
1865 }
1866 }
1867
1868 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
1869 if (program->kick_usc) {
1870 /* Src0 for DOUTU */
1871 pvr_pds_write_wide_constant(buffer,
1872 usc_control_constant64,
1873 program->usc_task_control.src0); /* 64-bit
1874 * Src0.
1875 */
1876 }
1877
1878 if (program->has_coefficient_update_task) {
1879 /* Src0 for DOUTU. */
1880 pvr_pds_write_wide_constant(
1881 buffer,
1882 usc_control_constant64_coeff_update,
1883 program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
1884 }
1885
1886 if ((program->barrier_coefficient != PVR_PDS_REG_UNUSED) ||
1887 (program->clear_pds_barrier) ||
1888 (program->kick_usc && program->conditional_render)) {
1889 pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
1890 * Src0
1891 */
1892 }
1893
1894 if (program->barrier_coefficient != PVR_PDS_REG_UNUSED) {
1895 if (PVR_HAS_QUIRK(dev_info, 51210)) {
1896 /* Write the constant for the coefficient register write. */
1897 doutw = pvr_pds_encode_doutw_src1(
1898 program->barrier_coefficient + 4,
1899 PVR_PDS_DOUTW_LOWER64,
1900 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1901 true,
1902 dev_info);
1903 pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw);
1904 }
1905 /* Write the constant for the coefficient register write. */
1906 doutw = pvr_pds_encode_doutw_src1(
1907 program->barrier_coefficient,
1908 PVR_PDS_DOUTW_LOWER64,
1909 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1910 true,
1911 dev_info);
1912
1913 /* Check whether the barrier is going to be the last DOUTW done by
1914 * the coefficient sync task.
1915 */
1916 if ((program->work_group_input_regs[0] == PVR_PDS_REG_UNUSED) &&
1917 (program->work_group_input_regs[1] == PVR_PDS_REG_UNUSED) &&
1918 (program->work_group_input_regs[2] == PVR_PDS_REG_UNUSED)) {
1919 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1920 }
1921
1922 pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
1923 }
1924
1925 /* If we want work-group id X, see if we also want work-group id Y. */
1926 if (program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED &&
1927 program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED) {
1928 /* Make sure we are going to DOUTW them into adjacent registers
1929 * otherwise we can't do it in one.
1930 */
1931 assert(program->work_group_input_regs[1] ==
1932 (program->work_group_input_regs[0] + 1));
1933
1934 doutw = pvr_pds_encode_doutw_src1(
1935 program->work_group_input_regs[0],
1936 PVR_PDS_DOUTW_LOWER64,
1937 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1938 true,
1939 dev_info);
1940
1941 /* If we don't want the Z work-group id then this is the last one.
1942 */
1943 if (program->work_group_input_regs[2] == PVR_PDS_REG_UNUSED)
1944 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1945
1946 pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
1947 }
1948 /* If we only want one of X or Y then handle them separately. */
1949 else {
1950 if (program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED) {
1951 doutw = pvr_pds_encode_doutw_src1(
1952 program->work_group_input_regs[0],
1953 PVR_PDS_DOUTW_LOWER32,
1954 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1955 true,
1956 dev_info);
1957
1958 /* If we don't want the Z work-group id then this is the last
1959 * one.
1960 */
1961 if (program->work_group_input_regs[2] == PVR_PDS_REG_UNUSED)
1962 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1963
1964 pvr_pds_write_constant32(buffer,
1965 work_group_id_ctrl_words[0],
1966 doutw);
1967 } else if (program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED) {
1968 doutw = pvr_pds_encode_doutw_src1(
1969 program->work_group_input_regs[1],
1970 PVR_PDS_DOUTW_UPPER32,
1971 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
1972 true,
1973 dev_info);
1974
1975 /* If we don't want the Z work-group id then this is the last
1976 * one.
1977 */
1978 if (program->work_group_input_regs[2] == PVR_PDS_REG_UNUSED)
1979 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
1980
1981 pvr_pds_write_constant32(buffer,
1982 work_group_id_ctrl_words[0],
1983 doutw);
1984 }
1985 }
1986
1987 /* Handle work-group id Z. */
1988 if (program->work_group_input_regs[2] != PVR_PDS_REG_UNUSED) {
1989 doutw = pvr_pds_encode_doutw_src1(
1990 program->work_group_input_regs[2],
1991 PVR_PDS_DOUTW_UPPER32,
1992 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
1993 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
1994 true,
1995 dev_info);
1996
1997 pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
1998 }
1999
2000 /* Handle the local IDs. */
2001 if ((program->local_input_regs[1] != PVR_PDS_REG_UNUSED) ||
2002 (program->local_input_regs[2] != PVR_PDS_REG_UNUSED)) {
2003 uint32_t dest_reg;
2004
2005 /* If we want local id Y and Z make sure the compiler wants them in
2006 * the same register.
2007 */
2008 if (!program->flattened_work_groups) {
2009 if ((program->local_input_regs[1] != PVR_PDS_REG_UNUSED) &&
2010 (program->local_input_regs[2] != PVR_PDS_REG_UNUSED)) {
2011 assert(program->local_input_regs[1] ==
2012 program->local_input_regs[2]);
2013 }
2014 }
2015
2016 if (program->local_input_regs[1] != PVR_PDS_REG_UNUSED)
2017 dest_reg = program->local_input_regs[1];
2018 else
2019 dest_reg = program->local_input_regs[2];
2020
2021 /* If we want local id X and (Y or Z) then we can do that in a
2022 * single 64-bit DOUTW.
2023 */
2024 if (program->local_input_regs[0] != PVR_PDS_REG_UNUSED) {
2025 assert(dest_reg == (program->local_input_regs[0] + 1));
2026
2027 doutw = pvr_pds_encode_doutw_src1(
2028 program->local_input_regs[0],
2029 PVR_PDS_DOUTW_LOWER64,
2030 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2031 true,
2032 dev_info);
2033
2034 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2035
2036 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2037 }
2038 /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW.
2039 */
2040 else {
2041 doutw = pvr_pds_encode_doutw_src1(
2042 dest_reg,
2043 PVR_PDS_DOUTW_UPPER32,
2044 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
2045 true,
2046 dev_info);
2047
2048 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
2049
2050 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2051 }
2052 }
2053 /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
2054 */
2055 else if (program->local_input_regs[0] != PVR_PDS_REG_UNUSED) {
2056 doutw = pvr_pds_encode_doutw_src1(
2057 program->local_input_regs[0],
2058 PVR_PDS_DOUTW_LOWER32,
2059 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
2060 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
2061 true,
2062 dev_info);
2063
2064 pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
2065 }
2066 }
2067
2068 if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
2069 gen_mode == PDS_GENERATE_SIZES) {
2070 const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
2071 #define APPEND(X) \
2072 if (encode) { \
2073 *buffer = X; \
2074 buffer++; \
2075 } else { \
2076 code_size += sizeof(uint32_t); \
2077 }
2078
2079 /* Assert that coeff_update_task_branch_size is > 0 because if it is 0
2080 * then we will be doing an infinite loop.
2081 */
2082 if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
2083 assert(program->coeff_update_task_branch_size > 0);
2084
2085 /* Test whether this is the coefficient update task or not. */
2086 APPEND(
2087 pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */
2088 PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */
2089 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */
2090 program->coeff_update_task_branch_size /* ADDR */));
2091
2092 /* Do we need to initialize the barrier coefficient? */
2093 if (program->barrier_coefficient != PVR_PDS_REG_UNUSED) {
2094 if (PVR_HAS_QUIRK(dev_info, 51210)) {
2095 /* Initialize the second barrier coefficient registers to zero.
2096 */
2097 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2098 0, /* END */
2099 barrier_ctrl_word2, /* SRC1 */
2100 zero_constant64 >> 1)); /* SRC0 */
2101 }
2102 /* Initialize the coefficient register to zero. */
2103 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2104 0, /* END */
2105 barrier_ctrl_word, /* SRC1 */
2106 zero_constant64 >> 1)); /* SRC0 */
2107 }
2108
2109 if (program->add_base_workgroup) {
2110 const uint32_t temp_values[3] = { 0, 1, 3 };
2111 for (uint32_t workgroup_component = 0; workgroup_component < 3;
2112 workgroup_component++) {
2113 if (program->work_group_input_regs[workgroup_component] ==
2114 PVR_PDS_REG_UNUSED) {
2115 continue;
2116 }
2117
2118 APPEND(pvr_pds_inst_encode_add32(
2119 /* cc */ 0x0,
2120 /* ALUM */ 0,
2121 /* SNA */ 0,
2122 /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
2123 program->base_workgroup_constant_offset_in_dwords
2124 [workgroup_component],
2125 /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
2126 PVR_PDS_CDM_WORK_GROUP_ID_X +
2127 temp_values[workgroup_component],
2128 /* DST (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
2129 PVR_PDS_CDM_WORK_GROUP_ID_X +
2130 temp_values[workgroup_component]));
2131 }
2132 }
2133
2134 /* If we are going to put the work-group IDs in coefficients then we
2135 * just need to do the DOUTWs.
2136 */
2137 if ((program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED) ||
2138 (program->work_group_input_regs[1] != PVR_PDS_REG_UNUSED)) {
2139 uint32_t dest_reg;
2140
2141 if (program->work_group_input_regs[0] != PVR_PDS_REG_UNUSED)
2142 dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
2143 else
2144 dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
2145
2146 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2147 0, /* END */
2148 work_group_id_ctrl_words[0], /* SRC1
2149 */
2150 dest_reg >> 1)); /* SRC0 */
2151 }
2152
2153 if (program->work_group_input_regs[2] != PVR_PDS_REG_UNUSED) {
2154 APPEND(pvr_pds_encode_doutw64(
2155 0, /* cc */
2156 0, /* END */
2157 work_group_id_ctrl_words[1], /* SRC1 */
2158 (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >>
2159 1)); /* SRC0 */
2160 }
2161
2162 /* Issue the task to the USC. */
2163 if (program->kick_usc && program->has_coefficient_update_task) {
2164 APPEND(pvr_pds_encode_doutu(0, /* cc */
2165 1, /* END */
2166 usc_control_constant64_coeff_update >>
2167 1)); /* SRC0; DOUTU 64-bit Src0 */
2168 }
2169
2170 /* Encode a HALT */
2171 APPEND(pvr_pds_inst_encode_halt(0));
2172
2173 /* Set the branch size used to skip the coefficient sync task. */
2174 program->coeff_update_task_branch_size = code_size / sizeof(uint32_t);
2175
2176 /* DOUTW in the local IDs. */
2177
2178 /* If we want X and Y or Z, we only need one DOUTW. */
2179 if ((program->local_input_regs[0] != PVR_PDS_REG_UNUSED) &&
2180 ((program->local_input_regs[1] != PVR_PDS_REG_UNUSED) ||
2181 (program->local_input_regs[2] != PVR_PDS_REG_UNUSED))) {
2182 local_input_register =
2183 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2184 } else {
2185 /* If we just want X. */
2186 if (program->local_input_regs[0] != PVR_PDS_REG_UNUSED) {
2187 local_input_register =
2188 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
2189 }
2190 /* If we just want Y or Z. */
2191 else if (program->local_input_regs[1] != PVR_PDS_REG_UNUSED ||
2192 program->local_input_regs[2] != PVR_PDS_REG_UNUSED) {
2193 local_input_register =
2194 PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
2195 }
2196 }
2197
2198 if ((program->local_input_regs[0] != PVR_PDS_REG_UNUSED) ||
2199 (program->local_input_regs[1] != PVR_PDS_REG_UNUSED) ||
2200 (program->local_input_regs[2] != PVR_PDS_REG_UNUSED)) {
2201 APPEND(pvr_pds_encode_doutw64(0, /* cc */
2202 0, /* END */
2203 local_id_ctrl_word, /* SRC1 */
2204 local_input_register >> 1)); /* SRC0
2205 */
2206 }
2207
2208 if (program->clear_pds_barrier) {
2209 /* Zero the persistent temp (SW fence for context switch). */
2210 APPEND(pvr_pds_inst_encode_add64(
2211 0, /* cc */
2212 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2213 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2214 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2215 (zero_constant64 >> 1), /* src0 = 0 */
2216 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2217 (zero_constant64 >> 1), /* src1 = 0 */
2218 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest =
2219 * ptemp64[0]
2220 */
2221 }
2222
2223 /* If this is a fence, issue the DOUTC. */
2224 if (program->fence) {
2225 APPEND(pvr_pds_inst_encode_doutc(0, /* cc */
2226 0 /* END */));
2227 }
2228
2229 if (program->kick_usc) {
2230 if (program->conditional_render) {
2231 /* Skip if coefficient update task. */
2232 APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1,
2233 0,
2234 PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2235 16));
2236
2237 /* Load the predicate. */
2238 APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1));
2239
2240 /* Load negate constant into temp for CMP. */
2241 APPEND(pvr_pds_inst_encode_add64(
2242 0, /* cc */
2243 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2244 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2245 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2246 (cond_render_negate_constant >> 1), /* src0 = 0 */
2247 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2248 (zero_constant64 >> 1), /* src1 = 0 */
2249 PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER +
2250 (cond_render_negate_temp >> 1))); /* dest = ptemp64[0]
2251 */
2252
2253 APPEND(pvr_pds_inst_encode_wdf(0));
2254
2255 for (uint32_t i = 0; i < 4; i++) {
2256 APPEND(pvr_pds_inst_encode_sftlp32(
2257 1, /* enable immediate */
2258 0, /* cc */
2259 PVR_ROGUE_PDSINST_LOP_AND, /* LOP */
2260 cond_render_pred_temp + i, /* SRC0 */
2261 cond_render_pred_mask_constant + i, /* SRC1 */
2262 0, /* SRC2 (Shift) */
2263 cond_render_pred_temp + i)); /* DST */
2264
2265 APPEND(
2266 pvr_pds_inst_encode_sftlp32(1, /* enable immediate */
2267 0, /* cc */
2268 PVR_ROGUE_PDSINST_LOP_OR, /* LOP
2269 */
2270 cond_render_pred_temp + i, /* SRC0
2271 */
2272 cond_render_pred_temp, /* SRC1 */
2273 0, /* SRC2 (Shift) */
2274 cond_render_pred_temp)); /* DST */
2275 }
2276
2277 APPEND(pvr_pds_inst_encode_limm(0, /* cc */
2278 cond_render_pred_temp + 1, /* SRC1
2279 */
2280 0, /* SRC0 */
2281 0)); /* GLOBALREG */
2282
2283 APPEND(pvr_pds_inst_encode_sftlp32(1, /* enable immediate */
2284 0, /* cc */
2285 PVR_ROGUE_PDSINST_LOP_XOR, /* LOP
2286 */
2287 cond_render_pred_temp, /* SRC0 */
2288 cond_render_negate_temp, /* SRC1
2289 */
2290 0, /* SRC2 (Shift) */
2291 cond_render_pred_temp)); /* DST
2292 */
2293
2294 /* Check that the predicate is 0. */
2295 APPEND(pvr_pds_inst_encode_cmpi(
2296 0, /* cc */
2297 PVR_ROGUE_PDSINST_COP_EQ, /* LOP */
2298 (cond_render_pred_temp >> 1) +
2299 PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */
2300 0)); /* SRC1 */
2301
2302 /* If predicate is 0, skip DOUTU. */
2303 APPEND(pvr_pds_inst_encode_bra(
2304 PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC:
2305 P0 */
2306 0, /* NEG */
2307 PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC:
2308 keep
2309 */
2310 2));
2311 }
2312
2313 /* Issue the task to the USC.
2314 * DoutU src1=USC Code Base address, src2=doutu word 2.
2315 */
2316 APPEND(pvr_pds_encode_doutu(1, /* cc */
2317 1, /* END */
2318 usc_control_constant64 >> 1)); /* SRC0;
2319 * DOUTU
2320 * 64-bit
2321 * Src0.
2322 */
2323 }
2324
2325 /* End the program if the Dout did not already end it. */
2326 APPEND(pvr_pds_inst_encode_halt(0));
2327 #undef APPEND
2328 }
2329
2330 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2331 /* Set the data segment pointer and ensure we return 1 past the buffer
2332 * ptr.
2333 */
2334 program->data_segment = buffer;
2335
2336 buffer += next_constant;
2337 }
2338
2339 /* Require at least one DWORD of PDS data so the program runs. */
2340 data_size = MAX2(1, data_size);
2341
2342 program->temps_used = temps_used;
2343 program->highest_temp = temps_used;
2344 program->data_size = data_size;
2345 if (gen_mode == PDS_GENERATE_SIZES)
2346 program->code_size = code_size;
2347
2348 return buffer;
2349 }
2350
2351 /**
2352 * Generates the PDS vertex shader data or code block. This program will do a
2353 * DMA into USC Constants followed by a DOUTU.
2354 *
2355 * \param program Pointer to the PDS vertex shader program.
2356 * \param buffer Pointer to the buffer for the program.
2357 * \param gen_mode Generate code or data.
2358 * \param dev_info PVR device information struct.
2359 * \returns Pointer to just beyond the code/data.
2360 */
pvr_pds_vertex_shader_sa(struct pvr_pds_vertex_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)2361 uint32_t *pvr_pds_vertex_shader_sa(
2362 struct pvr_pds_vertex_shader_sa_program *restrict program,
2363 uint32_t *restrict buffer,
2364 enum pvr_pds_generate_mode gen_mode,
2365 const struct pvr_device_info *dev_info)
2366 {
2367 uint32_t next_constant;
2368 uint32_t data_size = 0;
2369 uint32_t code_size = 0;
2370
2371 uint32_t usc_control_constant64 = 0;
2372 uint32_t dma_address_constant64 = 0;
2373 uint32_t dma_control_constant32 = 0;
2374 uint32_t doutw_value_constant64 = 0;
2375 uint32_t doutw_control_constant32 = 0;
2376 uint32_t fence_constant_word = 0;
2377 uint32_t *buffer_base;
2378 uint32_t kick_index;
2379
2380 uint32_t total_num_doutw =
2381 program->num_dword_doutw + program->num_q_word_doutw;
2382 uint32_t total_size_dma =
2383 program->num_dword_doutw + 2 * program->num_q_word_doutw;
2384
2385 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2386
2387 /* Copy the DMA control words and USC task control words to constants.
2388 *
2389 * Arrange them so that the 64-bit words are together followed by the 32-bit
2390 * words.
2391 */
2392 if (program->kick_usc) {
2393 usc_control_constant64 =
2394 pvr_pds_get_constants(&next_constant, 2, &data_size);
2395 }
2396
2397 if (program->clear_pds_barrier) {
2398 fence_constant_word =
2399 pvr_pds_get_constants(&next_constant, 2, &data_size);
2400 }
2401 dma_address_constant64 = pvr_pds_get_constants(&next_constant,
2402 2 * program->num_dma_kicks,
2403 &data_size);
2404
2405 /* Assign all unaligned constants together to avoid alignment issues caused
2406 * by pvr_pds_get_constants with even allocation sizes.
2407 */
2408 doutw_value_constant64 = pvr_pds_get_constants(
2409 &next_constant,
2410 total_size_dma + total_num_doutw + program->num_dma_kicks,
2411 &data_size);
2412 doutw_control_constant32 = doutw_value_constant64 + total_size_dma;
2413 dma_control_constant32 = doutw_control_constant32 + total_num_doutw;
2414
2415 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
2416 buffer_base = buffer;
2417
2418 if (program->kick_usc) {
2419 /* Src0 for DOUTU. */
2420 pvr_pds_write_wide_constant(buffer_base,
2421 usc_control_constant64,
2422 program->usc_task_control.src0); /* DOUTU
2423 * 64-bit
2424 * Src0.
2425 */
2426 buffer += 2;
2427 }
2428
2429 if (program->clear_pds_barrier) {
2430 /* Encode the fence constant src0. Fence barrier is initialized to
2431 * zero.
2432 */
2433 pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0);
2434 buffer += 2;
2435 }
2436
2437 if (total_num_doutw > 0) {
2438 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2439 /* Write the constant for the coefficient register write. */
2440 pvr_pds_write_constant64(buffer_base,
2441 doutw_value_constant64,
2442 program->q_word_doutw_value[2 * i],
2443 program->q_word_doutw_value[2 * i + 1]);
2444 pvr_pds_write_constant32(
2445 buffer_base,
2446 doutw_control_constant32,
2447 program->q_word_doutw_control[i] |
2448 ((!program->num_dma_kicks && i == total_num_doutw - 1)
2449 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2450 : 0));
2451
2452 doutw_value_constant64 += 2;
2453 doutw_control_constant32 += 1;
2454 }
2455
2456 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2457 /* Write the constant for the coefficient register write. */
2458 pvr_pds_write_constant32(buffer_base,
2459 doutw_value_constant64,
2460 program->dword_doutw_value[i]);
2461 pvr_pds_write_constant32(
2462 buffer_base,
2463 doutw_control_constant32,
2464 program->dword_doutw_control[i] |
2465 ((!program->num_dma_kicks && i == program->num_dword_doutw - 1)
2466 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
2467 : 0));
2468
2469 doutw_value_constant64 += 1;
2470 doutw_control_constant32 += 1;
2471 }
2472
2473 buffer += total_size_dma + total_num_doutw;
2474 }
2475
2476 if (program->num_dma_kicks == 1) /* Most-common case. */
2477 {
2478 /* Src0 for DOUTD - Address. */
2479 pvr_pds_write_dma_address(buffer_base,
2480 dma_address_constant64,
2481 program->dma_address[0],
2482 false,
2483 dev_info);
2484
2485 /* Src1 for DOUTD - Control Word. */
2486 pvr_pds_write_constant32(
2487 buffer_base,
2488 dma_control_constant32,
2489 program->dma_control[0] |
2490 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2491
2492 /* Move the buffer ptr along as we will return 1 past the buffer. */
2493 buffer += 3;
2494 } else if (program->num_dma_kicks > 1) {
2495 for (kick_index = 0; kick_index < program->num_dma_kicks - 1;
2496 kick_index++) {
2497 /* Src0 for DOUTD - Address. */
2498 pvr_pds_write_dma_address(buffer_base,
2499 dma_address_constant64,
2500 program->dma_address[kick_index],
2501 false,
2502 dev_info);
2503
2504 /* Src1 for DOUTD - Control Word. */
2505 pvr_pds_write_constant32(buffer_base,
2506 dma_control_constant32,
2507 program->dma_control[kick_index]);
2508 dma_address_constant64 += 2;
2509 dma_control_constant32 += 1;
2510 }
2511
2512 /* Src0 for DOUTD - Address. */
2513 pvr_pds_write_dma_address(buffer_base,
2514 dma_address_constant64,
2515 program->dma_address[kick_index],
2516 false,
2517 dev_info);
2518
2519 /* Src1 for DOUTD - Control Word. */
2520 pvr_pds_write_constant32(
2521 buffer_base,
2522 dma_control_constant32,
2523 program->dma_control[kick_index] |
2524 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
2525
2526 buffer += 3 * program->num_dma_kicks;
2527 }
2528 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2529 if (program->clear_pds_barrier) {
2530 /* Zero the persistent temp (SW fence for context switch). */
2531 *buffer++ = pvr_pds_inst_encode_add64(
2532 0, /* cc */
2533 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
2534 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
2535 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2536 (fence_constant_word >> 1), /* src0 = 0 */
2537 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
2538 (fence_constant_word >> 1), /* src1 = 0 */
2539 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
2540 * ptemp[0]
2541 */
2542 }
2543
2544 if (total_num_doutw > 0) {
2545 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2546 /* Set the coefficient register to data value. */
2547 *buffer++ = pvr_pds_encode_doutw64(
2548 /* cc */ 0,
2549 /* END */ !program->num_dma_kicks && !program->kick_usc &&
2550 (i == total_num_doutw - 1),
2551 /* SRC1 */ doutw_control_constant32,
2552 /* SRC0 */ doutw_value_constant64 >> 1);
2553
2554 doutw_value_constant64 += 2;
2555 doutw_control_constant32 += 1;
2556 }
2557
2558 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2559 /* Set the coefficient register to data value. */
2560 *buffer++ = pvr_pds_encode_doutw64(
2561 /* cc */ 0,
2562 /* END */ !program->num_dma_kicks && !program->kick_usc &&
2563 (i == program->num_dword_doutw - 1),
2564 /* SRC1 */ doutw_control_constant32,
2565 /* SRC0 */ doutw_value_constant64 >> 1);
2566
2567 doutw_value_constant64 += 1;
2568 doutw_control_constant32 += 1;
2569 }
2570 }
2571
2572 if (program->num_dma_kicks != 0) {
2573 /* DMA the state into the secondary attributes. */
2574
2575 if (program->num_dma_kicks == 1) /* Most-common case. */
2576 {
2577 *buffer++ = pvr_pds_encode_doutd(
2578 /* cc */ 0,
2579 /* END */ !program->kick_usc,
2580 /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */
2581 /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit
2582 * Src0.
2583 */
2584 } else {
2585 for (kick_index = 0; kick_index < program->num_dma_kicks;
2586 kick_index++) {
2587 *buffer++ = pvr_pds_encode_doutd(
2588 /* cc */ 0,
2589 /* END */ (!program->kick_usc) &&
2590 (kick_index + 1 == program->num_dma_kicks),
2591 /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit
2592 * Src1.
2593 */
2594 /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD
2595 * 64-bit
2596 * Src0.
2597 */
2598 dma_address_constant64 += 2;
2599 dma_control_constant32 += 1;
2600 }
2601 }
2602 }
2603
2604 if (program->kick_usc) {
2605 /* Kick the USC. */
2606 *buffer++ = pvr_pds_encode_doutu(
2607 /* cc */ 0,
2608 /* END */ 1,
2609 /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0.
2610 */
2611 }
2612
2613 if (!program->kick_usc && program->num_dma_kicks == 0 &&
2614 total_num_doutw == 0) {
2615 *buffer++ = pvr_pds_inst_encode_halt(0);
2616 }
2617 }
2618
2619 code_size = program->num_dma_kicks + total_num_doutw;
2620 if (program->clear_pds_barrier)
2621 code_size++; /* ADD64 instruction. */
2622
2623 if (program->kick_usc)
2624 code_size++;
2625
2626 /* If there are no DMAs and no USC kick then code is HALT only. */
2627 if (code_size == 0)
2628 code_size = 1;
2629
2630 program->data_size = data_size;
2631 program->code_size = code_size;
2632
2633 return buffer;
2634 }
2635
2636 /**
2637 * Writes the Uniform Data block for the PDS pixel shader secondary attributes
2638 * program.
2639 *
2640 * \param program Pointer to the PDS pixel shader secondary attributes program.
2641 * \param buffer Pointer to the buffer for the code/data.
2642 * \param gen_mode Either code or data can be generated or sizes only updated.
2643 * \returns Pointer to just beyond the buffer for the program/data.
2644 */
pvr_pds_pixel_shader_uniform_texture_code(struct pvr_pds_pixel_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)2645 uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
2646 struct pvr_pds_pixel_shader_sa_program *restrict program,
2647 uint32_t *restrict buffer,
2648 enum pvr_pds_generate_mode gen_mode)
2649 {
2650 uint32_t *instruction;
2651 uint32_t code_size = 0;
2652 uint32_t data_size = 0;
2653 uint32_t temps_used = 0;
2654 uint32_t next_constant;
2655
2656 assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
2657 0);
2658
2659 assert((gen_mode == PDS_GENERATE_CODE_SEGMENT && buffer) ||
2660 gen_mode == PDS_GENERATE_SIZES);
2661
2662 /* clang-format off */
2663 /* Shape of code segment (note: clear is different)
2664 *
2665 * Code
2666 * +------------+
2667 * | BRA if0 |
2668 * | DOUTD |
2669 * | ... |
2670 * | DOUTD.halt |
2671 * | uniform |
2672 * | DOUTD |
2673 * | ... |
2674 * | ... |
2675 * | DOUTW |
2676 * | ... |
2677 * | ... |
2678 * | DOUTU.halt |
2679 * | HALT |
2680 * +------------+
2681 */
2682 /* clang-format on */
2683 instruction = buffer;
2684
2685 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
2686
2687 /* The clear color can arrive packed in the right form in the first (or
2688 * first 2) dwords of the shared registers and the program will issue a
2689 * single doutw for this.
2690 */
2691 if (program->clear && program->packed_clear) {
2692 uint32_t color_constant1 =
2693 pvr_pds_get_constants(&next_constant, 2, &data_size);
2694
2695 uint32_t control_word_constant1 =
2696 pvr_pds_get_constants(&next_constant, 2, &data_size);
2697
2698 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2699 /* DOUTW the clear color to the USC constants. Predicate with
2700 * uniform loading flag (IF0).
2701 */
2702 *instruction++ = pvr_pds_encode_doutw64(
2703 /* cc */ 1, /* Only for uniform loading program. */
2704 /* END */ program->kick_usc ? 0 : 1, /* Last
2705 * instruction
2706 * for a clear.
2707 */
2708 /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2709 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2710
2711 code_size += 1;
2712 }
2713 } else if (program->clear) {
2714 uint32_t color_constant1, color_constant2;
2715
2716 if (program->clear_color_dest_reg & 0x1) {
2717 uint32_t color_constant3, control_word_constant1,
2718 control_word_constant2, color_constant4;
2719
2720 color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2721 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2722 color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
2723
2724 control_word_constant1 =
2725 pvr_pds_get_constants(&next_constant, 2, &data_size);
2726 control_word_constant2 =
2727 pvr_pds_get_constants(&next_constant, 2, &data_size);
2728 color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2729
2730 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2731 /* DOUTW the clear color to the USSE constants. Predicate with
2732 * uniform loading flag (IF0).
2733 */
2734 *instruction++ = pvr_pds_encode_doutw64(
2735 /* cc */ 1, /* Only for Uniform Loading program */
2736 /* END */ 0,
2737 /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
2738 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2739
2740 *instruction++ = pvr_pds_encode_doutw64(
2741 /* cc */ 1, /* Only for Uniform Loading program */
2742 /* END */ 0,
2743 /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */
2744 /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2745
2746 *instruction++ = pvr_pds_encode_doutw64(
2747 /* cc */ 1, /* Only for uniform loading program */
2748 /* END */ program->kick_usc ? 0 : 1, /* Last
2749 * instruction
2750 * for a clear.
2751 */
2752 /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */
2753 /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */
2754 }
2755
2756 code_size += 3;
2757 } else {
2758 uint32_t control_word_constant, control_word_last_constant;
2759
2760 /* Put the clear color and control words into the first 8
2761 * constants.
2762 */
2763 color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2764 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
2765 control_word_constant =
2766 pvr_pds_get_constants(&next_constant, 2, &data_size);
2767 control_word_last_constant =
2768 pvr_pds_get_constants(&next_constant, 2, &data_size);
2769
2770 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2771 /* DOUTW the clear color to the USSE constants. Predicate with
2772 * uniform loading flag (IF0).
2773 */
2774 *instruction++ = pvr_pds_encode_doutw64(
2775 /* cc */ 1, /* Only for Uniform Loading program */
2776 /* END */ 0,
2777 /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */
2778 /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
2779
2780 *instruction++ = pvr_pds_encode_doutw64(
2781 /* cc */ 1, /* Only for uniform loading program */
2782 /* END */ program->kick_usc ? 0 : 1, /* Last
2783 * instruction
2784 * for a clear.
2785 */
2786 /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1
2787 */
2788 /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
2789 }
2790
2791 code_size += 2;
2792 }
2793
2794 if (program->kick_usc) {
2795 uint32_t doutu_constant64;
2796
2797 doutu_constant64 =
2798 pvr_pds_get_constants(&next_constant, 2, &data_size);
2799
2800 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2801 /* Issue the task to the USC.
2802 *
2803 * dout ds1[constant_use], ds0[constant_use],
2804 * ds1[constant_use], emit
2805 */
2806 *instruction++ = pvr_pds_encode_doutu(
2807 /* cc */ 0,
2808 /* END */ 1,
2809 /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0
2810 */
2811 }
2812
2813 code_size += 1;
2814 }
2815
2816 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2817 /* End the program. */
2818 *instruction++ = pvr_pds_inst_encode_halt(0);
2819 }
2820 code_size += 1;
2821 } else {
2822 uint32_t total_num_doutw =
2823 program->num_dword_doutw + program->num_q_word_doutw;
2824 bool both_textures_and_uniforms =
2825 ((program->num_texture_dma_kicks > 0) &&
2826 ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) ||
2827 program->kick_usc));
2828 uint32_t doutu_constant64 = 0;
2829
2830 if (both_textures_and_uniforms) {
2831 /* If the size of a PDS data section is 0, the hardware won't run
2832 * it. We therefore don't need to branch when there is only a
2833 * texture OR a uniform update program.
2834 */
2835 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2836 uint32_t branch_address =
2837 MAX2(1 + program->num_texture_dma_kicks, 2);
2838
2839 /* Use If0 to BRAnch to uniform code. */
2840 *instruction++ = pvr_pds_encode_bra(
2841 /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0,
2842 /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE,
2843 /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
2844 /* ADDR */ branch_address);
2845 }
2846
2847 code_size += 1;
2848 }
2849
2850 if (program->num_texture_dma_kicks > 0) {
2851 uint32_t dma_address_constant64;
2852 uint32_t dma_control_constant32;
2853 /* Allocate 3 constant spaces for each kick. The 64-bit constants
2854 * come first followed by the 32-bit constants.
2855 */
2856 dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE;
2857 dma_control_constant32 =
2858 dma_address_constant64 + (program->num_texture_dma_kicks * 2);
2859
2860 for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) {
2861 code_size += 1;
2862 if (gen_mode != PDS_GENERATE_CODE_SEGMENT)
2863 continue;
2864
2865 /* DMA the state into the secondary attributes. */
2866 *instruction++ = pvr_pds_encode_doutd(
2867 /* cc */ 0,
2868 /* END */ dma == (program->num_texture_dma_kicks - 1),
2869 /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */
2870 /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2871 * 64-bit
2872 * Src0
2873 */
2874 dma_address_constant64 += 2;
2875 dma_control_constant32 += 1;
2876 }
2877 } else if (both_textures_and_uniforms) {
2878 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2879 /* End the program. */
2880 *instruction++ = pvr_pds_inst_encode_halt(0);
2881 }
2882
2883 code_size += 1;
2884 }
2885
2886 /* Reserve space at the beginning of the data segment for the DOUTU Task
2887 * Control if one is needed.
2888 */
2889 if (program->kick_usc) {
2890 doutu_constant64 =
2891 pvr_pds_get_constants(&next_constant, 2, &data_size);
2892 }
2893
2894 /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The
2895 * 64-bit constants come first followed by the 32-bit constants.
2896 */
2897 uint32_t total_size_dma =
2898 program->num_dword_doutw + 2 * program->num_q_word_doutw;
2899
2900 uint32_t dma_address_constant64 = pvr_pds_get_constants(
2901 &next_constant,
2902 program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw,
2903 &data_size);
2904 uint32_t doutw_value_constant64 =
2905 dma_address_constant64 + program->num_uniform_dma_kicks * 2;
2906 uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma;
2907 uint32_t doutw_control_constant32 =
2908 dma_control_constant32 + program->num_uniform_dma_kicks;
2909
2910 if (total_num_doutw > 0) {
2911 pvr_pds_get_constants(&next_constant, 0, &data_size);
2912
2913 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2914 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
2915 /* Set the coefficient register to data value. */
2916 *instruction++ = pvr_pds_encode_doutw64(
2917 /* cc */ 0,
2918 /* END */ !program->num_uniform_dma_kicks &&
2919 !program->kick_usc && (i == total_num_doutw - 1),
2920 /* SRC1 */ doutw_control_constant32,
2921 /* SRC0 */ doutw_value_constant64 >> 1);
2922
2923 doutw_value_constant64 += 2;
2924 doutw_control_constant32 += 1;
2925 }
2926
2927 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
2928 /* Set the coefficient register to data value. */
2929 *instruction++ = pvr_pds_encode_doutw64(
2930 /* cc */ 0,
2931 /* END */ !program->num_uniform_dma_kicks &&
2932 !program->kick_usc && (i == program->num_dword_doutw - 1),
2933 /* SRC1 */ doutw_control_constant32,
2934 /* SRC0 */ doutw_value_constant64 >> 1);
2935
2936 doutw_value_constant64 += 1;
2937 doutw_control_constant32 += 1;
2938 }
2939 }
2940 code_size += total_num_doutw;
2941 }
2942
2943 if (program->num_uniform_dma_kicks > 0) {
2944 for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) {
2945 code_size += 1;
2946
2947 if (gen_mode != PDS_GENERATE_CODE_SEGMENT)
2948 continue;
2949
2950 bool last_instruction = false;
2951 if (!program->kick_usc &&
2952 (dma == program->num_uniform_dma_kicks - 1)) {
2953 last_instruction = true;
2954 }
2955 /* DMA the state into the secondary attributes. */
2956 *instruction++ = pvr_pds_encode_doutd(
2957 /* cc */ 0,
2958 /* END */ last_instruction,
2959 /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1
2960 */
2961 /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
2962 * 64-bit
2963 * Src0
2964 */
2965 dma_address_constant64 += 2;
2966 dma_control_constant32 += 1;
2967 }
2968 }
2969
2970 if (program->kick_usc) {
2971 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2972 /* Issue the task to the USC.
2973 *
2974 * dout ds1[constant_use], ds0[constant_use],
2975 * ds1[constant_use], emit
2976 */
2977
2978 *instruction++ = pvr_pds_encode_doutu(
2979 /* cc */ 0,
2980 /* END */ 1,
2981 /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */
2982 }
2983
2984 code_size += 1;
2985 } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) {
2986 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
2987 /* End the program. */
2988 *instruction++ = pvr_pds_inst_encode_halt(0);
2989 }
2990
2991 code_size += 1;
2992 }
2993 }
2994
2995 /* Minimum temp count is 1. */
2996 program->temps_used = MAX2(temps_used, 1);
2997 program->code_size = code_size;
2998
2999 if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
3000 return instruction;
3001 else
3002 return NULL;
3003 }
3004
3005 /**
3006 * Writes the Uniform Data block for the PDS pixel shader secondary attributes
3007 * program.
3008 *
3009 * \param program Pointer to the PDS pixel shader secondary attributes program.
3010 * \param buffer Pointer to the buffer for the code/data.
3011 * \param gen_mode Either code or data can be generated or sizes only updated.
3012 * \param dev_info PVR device information struct.
3013 * \returns Pointer to just beyond the buffer for the program/data.
3014 */
pvr_pds_pixel_shader_uniform_texture_data(struct pvr_pds_pixel_shader_sa_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,bool uniform,const struct pvr_device_info * dev_info)3015 uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
3016 struct pvr_pds_pixel_shader_sa_program *restrict program,
3017 uint32_t *restrict buffer,
3018 enum pvr_pds_generate_mode gen_mode,
3019 bool uniform,
3020 const struct pvr_device_info *dev_info)
3021 {
3022 uint32_t *constants = buffer;
3023 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3024 uint32_t temps_used = 0;
3025 uint32_t data_size = 0;
3026
3027 assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
3028 0);
3029
3030 assert(gen_mode != PDS_GENERATE_CODE_SEGMENT);
3031
3032 /* Shape of data segment (note: clear is different).
3033 *
3034 * Uniform Texture
3035 * +--------------+ +-------------+
3036 * | USC Task L | | USC Task L |
3037 * | H | | H |
3038 * | DMA1 Src0 L | | DMA1 Src0 L |
3039 * | H | | H |
3040 * | DMA2 Src0 L | | |
3041 * | H | | |
3042 * | DMA1 Src1 | | DMA1 Src1 |
3043 * | DMA2 Src1 | | |
3044 * | DOUTW0 Src1 | | |
3045 * | DOUTW1 Src1 | | |
3046 * | ... | | |
3047 * | DOUTWn Srcn | | |
3048 * | other data | | |
3049 * +--------------+ +-------------+
3050 */
3051
3052 /* Generate the PDS pixel shader secondary attributes data.
3053 *
3054 * Packed Clear
3055 * The clear color can arrive packed in the right form in the first (or
3056 * first 2) dwords of the shared registers and the program will issue a
3057 * single DOUTW for this.
3058 */
3059 if (program->clear && uniform && program->packed_clear) {
3060 uint32_t color_constant1 =
3061 pvr_pds_get_constants(&next_constant, 2, &data_size);
3062
3063 uint32_t control_word_constant1 =
3064 pvr_pds_get_constants(&next_constant, 2, &data_size);
3065
3066 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3067 uint32_t doutw;
3068
3069 pvr_pds_write_constant64(constants,
3070 color_constant1,
3071 program->clear_color[0],
3072 program->clear_color[1]);
3073
3074 /* Load into first constant in common store. */
3075 doutw = pvr_pds_encode_doutw_src1(
3076 program->clear_color_dest_reg,
3077 PVR_PDS_DOUTW_LOWER64,
3078 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3079 false,
3080 dev_info);
3081
3082 /* Set the last flag. */
3083 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3084 pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0);
3085 }
3086 } else if (program->clear && uniform) {
3087 uint32_t color_constant1, color_constant2;
3088
3089 if (program->clear_color_dest_reg & 0x1) {
3090 uint32_t color_constant3, control_word_constant1,
3091 control_word_constant2, color_constant4;
3092
3093 color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3094 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3095 color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
3096
3097 control_word_constant1 =
3098 pvr_pds_get_constants(&next_constant, 2, &data_size);
3099 control_word_constant2 =
3100 pvr_pds_get_constants(&next_constant, 2, &data_size);
3101 color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3102
3103 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3104 uint32_t doutw;
3105
3106 pvr_pds_write_constant32(constants,
3107 color_constant1,
3108 program->clear_color[0]);
3109
3110 pvr_pds_write_constant64(constants,
3111 color_constant2,
3112 program->clear_color[1],
3113 program->clear_color[2]);
3114
3115 pvr_pds_write_constant32(constants,
3116 color_constant3,
3117 program->clear_color[3]);
3118
3119 /* Load into first constant in common store. */
3120 doutw = pvr_pds_encode_doutw_src1(
3121 program->clear_color_dest_reg,
3122 PVR_PDS_DOUTW_LOWER32,
3123 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3124 false,
3125 dev_info);
3126
3127 pvr_pds_write_constant64(constants,
3128 control_word_constant1,
3129 doutw,
3130 0);
3131
3132 /* Move the destination register along. */
3133 doutw = pvr_pds_encode_doutw_src1(
3134 program->clear_color_dest_reg + 1,
3135 PVR_PDS_DOUTW_LOWER64,
3136 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3137 false,
3138 dev_info);
3139
3140 pvr_pds_write_constant64(constants,
3141 control_word_constant2,
3142 doutw,
3143 0);
3144
3145 /* Move the destination register along. */
3146 doutw = pvr_pds_encode_doutw_src1(
3147 program->clear_color_dest_reg + 3,
3148 PVR_PDS_DOUTW_LOWER32,
3149 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3150 false,
3151 dev_info);
3152
3153 /* Set the last flag. */
3154 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3155 pvr_pds_write_constant64(constants, color_constant4, doutw, 0);
3156 }
3157 } else {
3158 uint32_t control_word_constant, control_word_last_constant;
3159
3160 /* Put the clear color and control words into the first 8
3161 * constants.
3162 */
3163 color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3164 color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
3165 control_word_constant =
3166 pvr_pds_get_constants(&next_constant, 2, &data_size);
3167 control_word_last_constant =
3168 pvr_pds_get_constants(&next_constant, 2, &data_size);
3169
3170 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3171 uint32_t doutw;
3172 pvr_pds_write_constant64(constants,
3173 color_constant1,
3174 program->clear_color[0],
3175 program->clear_color[1]);
3176
3177 pvr_pds_write_constant64(constants,
3178 color_constant2,
3179 program->clear_color[2],
3180 program->clear_color[3]);
3181
3182 /* Load into first constant in common store. */
3183 doutw = pvr_pds_encode_doutw_src1(
3184 program->clear_color_dest_reg,
3185 PVR_PDS_DOUTW_LOWER64,
3186 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3187 false,
3188 dev_info);
3189
3190 pvr_pds_write_constant64(constants, control_word_constant, doutw, 0);
3191
3192 /* Move the destination register along. */
3193 doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK;
3194 doutw |= (program->clear_color_dest_reg + 2)
3195 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
3196
3197 /* Set the last flag. */
3198 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3199 pvr_pds_write_constant64(constants,
3200 control_word_last_constant,
3201 doutw,
3202 0);
3203 }
3204 }
3205
3206 /* Constants for the DOUTU Task Control, if needed. */
3207 if (program->kick_usc) {
3208 uint32_t doutu_constant64 =
3209 pvr_pds_get_constants(&next_constant, 2, &data_size);
3210
3211 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3212 pvr_pds_write_wide_constant(
3213 constants,
3214 doutu_constant64,
3215 program->usc_task_control.src0); /* 64-bit
3216 */
3217 /* Src0 */
3218 }
3219 }
3220 } else {
3221 if (uniform) {
3222 /* Reserve space at the beginning of the data segment for the DOUTU
3223 * Task Control if one is needed.
3224 */
3225 if (program->kick_usc) {
3226 uint32_t doutu_constant64 =
3227 pvr_pds_get_constants(&next_constant, 2, &data_size);
3228
3229 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3230 pvr_pds_write_wide_constant(
3231 constants,
3232 doutu_constant64,
3233 program->usc_task_control.src0); /* 64-bit Src0 */
3234 }
3235 }
3236
3237 uint32_t total_num_doutw =
3238 program->num_dword_doutw + program->num_q_word_doutw;
3239 uint32_t total_size_dma =
3240 program->num_dword_doutw + 2 * program->num_q_word_doutw;
3241
3242 /* Allocate 3 constant spaces for each kick. The 64-bit constants
3243 * come first followed by the 32-bit constants.
3244 */
3245 uint32_t dma_address_constant64 =
3246 pvr_pds_get_constants(&next_constant,
3247 program->num_uniform_dma_kicks * 3 +
3248 total_size_dma + total_num_doutw,
3249 &data_size);
3250 uint32_t doutw_value_constant64 =
3251 dma_address_constant64 + program->num_uniform_dma_kicks * 2;
3252 uint32_t dma_control_constant32 =
3253 doutw_value_constant64 + total_size_dma;
3254 uint32_t doutw_control_constant32 =
3255 dma_control_constant32 + program->num_uniform_dma_kicks;
3256
3257 if (total_num_doutw > 0) {
3258 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3259 for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
3260 pvr_pds_write_constant64(
3261 constants,
3262 doutw_value_constant64,
3263 program->q_word_doutw_value[2 * i],
3264 program->q_word_doutw_value[2 * i + 1]);
3265 pvr_pds_write_constant32(
3266 constants,
3267 doutw_control_constant32,
3268 program->q_word_doutw_control[i] |
3269 ((!program->num_uniform_dma_kicks &&
3270 i == total_num_doutw - 1)
3271 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3272 : 0));
3273
3274 doutw_value_constant64 += 2;
3275 doutw_control_constant32 += 1;
3276 }
3277
3278 for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
3279 pvr_pds_write_constant32(constants,
3280 doutw_value_constant64,
3281 program->dword_doutw_value[i]);
3282 pvr_pds_write_constant32(
3283 constants,
3284 doutw_control_constant32,
3285 program->dword_doutw_control[i] |
3286 ((!program->num_uniform_dma_kicks &&
3287 i == program->num_dword_doutw - 1)
3288 ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
3289 : 0));
3290
3291 doutw_value_constant64 += 1;
3292 doutw_control_constant32 += 1;
3293 }
3294 }
3295 }
3296
3297 if (program->num_uniform_dma_kicks > 0) {
3298 uint32_t kick;
3299
3300 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3301 for (kick = 0; kick < program->num_uniform_dma_kicks - 1;
3302 kick++) {
3303 /* Copy the dma control words to constants. */
3304 pvr_pds_write_dma_address(constants,
3305 dma_address_constant64,
3306 program->uniform_dma_address[kick],
3307 false,
3308 dev_info);
3309 pvr_pds_write_constant32(constants,
3310 dma_control_constant32,
3311 program->uniform_dma_control[kick]);
3312
3313 dma_address_constant64 += 2;
3314 dma_control_constant32 += 1;
3315 }
3316
3317 pvr_pds_write_dma_address(constants,
3318 dma_address_constant64,
3319 program->uniform_dma_address[kick],
3320 false,
3321 dev_info);
3322 pvr_pds_write_constant32(
3323 constants,
3324 dma_control_constant32,
3325 program->uniform_dma_control[kick] |
3326 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3327 }
3328 }
3329
3330 } else if (program->num_texture_dma_kicks > 0) {
3331 /* Allocate 3 constant spaces for each kick. The 64-bit constants
3332 * come first followed by the 32-bit constants.
3333 */
3334 uint32_t dma_address_constant64 =
3335 pvr_pds_get_constants(&next_constant,
3336 program->num_texture_dma_kicks * 3,
3337 &data_size);
3338 uint32_t dma_control_constant32 =
3339 dma_address_constant64 + (program->num_texture_dma_kicks * 2);
3340
3341 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3342 uint32_t kick;
3343 for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) {
3344 /* Copy the DMA control words to constants. */
3345 pvr_pds_write_dma_address(constants,
3346 dma_address_constant64,
3347 program->texture_dma_address[kick],
3348 false,
3349 dev_info);
3350
3351 pvr_pds_write_constant32(constants,
3352 dma_control_constant32,
3353 program->texture_dma_control[kick]);
3354
3355 dma_address_constant64 += 2;
3356 dma_control_constant32 += 1;
3357 }
3358
3359 pvr_pds_write_dma_address(constants,
3360 dma_address_constant64,
3361 program->texture_dma_address[kick],
3362 false,
3363 dev_info);
3364
3365 pvr_pds_write_constant32(
3366 constants,
3367 dma_control_constant32,
3368 program->texture_dma_control[kick] |
3369 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
3370 }
3371 }
3372 }
3373
3374 /* Save the data segment pointer and size. */
3375 program->data_segment = constants;
3376
3377 /* Minimum temp count is 1. */
3378 program->temps_used = MAX2(temps_used, 1);
3379 program->data_size = data_size;
3380
3381 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3382 return (constants + next_constant);
3383 else
3384 return NULL;
3385 }
3386
3387 /**
3388 * Generates generic DOUTC PDS program.
3389 *
3390 * \param program Pointer to the PDS kick USC.
3391 * \param buffer Pointer to the buffer for the program.
3392 * \param gen_mode Either code and data can be generated, or sizes only updated.
3393 * \returns Pointer to just beyond the buffer for the code or program segment.
3394 */
pvr_pds_generate_doutc(struct pvr_pds_fence_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)3395 uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
3396 uint32_t *restrict buffer,
3397 enum pvr_pds_generate_mode gen_mode)
3398 {
3399 uint32_t constant = 0;
3400
3401 /* Automatically get a data size of 1x 128bit chunks. */
3402 uint32_t data_size = 0, code_size = 0;
3403
3404 /* Setup the data part. */
3405 uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3406 uint32_t *instruction = buffer;
3407 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3408 * dwords.
3409 */
3410
3411 /* Update the program sizes. */
3412 program->data_size = data_size;
3413 program->code_size = code_size;
3414 program->data_segment = constants;
3415
3416 if (gen_mode == PDS_GENERATE_SIZES)
3417 return NULL;
3418
3419 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3420 /* Copy the USC task control words to constants. */
3421
3422 constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
3423 pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit
3424 * Src0
3425 */
3426
3427 uint32_t control_word_constant =
3428 pvr_pds_get_constants(&next_constant, 2, &data_size);
3429 pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit
3430 * Src1
3431 */
3432
3433 program->data_size = data_size;
3434 buffer += data_size;
3435
3436 return buffer;
3437 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3438 *instruction++ = pvr_pds_inst_encode_doutc(
3439 /* cc */ 0,
3440 /* END */ 0);
3441
3442 code_size++;
3443
3444 /* End the program. */
3445 *instruction++ = pvr_pds_inst_encode_halt(0);
3446 code_size++;
3447
3448 program->code_size = code_size;
3449 }
3450
3451 return instruction;
3452 }
3453
3454 /**
3455 * Generates generic kick DOUTU PDS program in a single data+code block.
3456 *
3457 * \param control Pointer to the PDS kick USC.
3458 * \param buffer Pointer to the buffer for the program.
3459 * \param gen_mode Either code and data can be generated or sizes only updated.
3460 * \param dev_info PVR device information structure.
3461 * \returns Pointer to just beyond the buffer for the code or program segment.
3462 */
pvr_pds_generate_doutw(struct pvr_pds_doutw_control * restrict control,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3463 uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control,
3464 uint32_t *restrict buffer,
3465 enum pvr_pds_generate_mode gen_mode,
3466 const struct pvr_device_info *dev_info)
3467 {
3468 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3469 uint32_t doutw;
3470 uint32_t data_size = 0, code_size = 0;
3471 uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3472 uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
3473
3474 /* Assert if buffer is exceeded. */
3475 assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS);
3476
3477 uint32_t *constants = buffer;
3478 uint32_t *instruction = buffer;
3479
3480 /* Put the constants and control words interleaved in the data region. */
3481 for (uint32_t const_pair = 0; const_pair < control->num_const64;
3482 const_pair++) {
3483 constant[const_pair] =
3484 pvr_pds_get_constants(&next_constant, 2, &data_size);
3485 control_word_constant[const_pair] =
3486 pvr_pds_get_constants(&next_constant, 2, &data_size);
3487 }
3488
3489 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3490 /* Data segment points to start of constants. */
3491 control->data_segment = constants;
3492
3493 for (uint32_t const_pair = 0; const_pair < control->num_const64;
3494 const_pair++) {
3495 pvr_pds_write_constant64(constants,
3496 constant[const_pair],
3497 H32(control->doutw_data[const_pair]),
3498 L32(control->doutw_data[const_pair]));
3499
3500 /* Start loading at offset 0. */
3501 if (control->dest_store == PDS_COMMON_STORE) {
3502 doutw = pvr_pds_encode_doutw_src1(
3503 (2 * const_pair),
3504 PVR_PDS_DOUTW_LOWER64,
3505 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
3506 false,
3507 dev_info);
3508 } else {
3509 doutw = pvr_pds_encode_doutw_src1(
3510 (2 * const_pair),
3511 PVR_PDS_DOUTW_LOWER64,
3512 PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
3513 false,
3514 dev_info);
3515 }
3516
3517 if (const_pair + 1 == control->num_const64) {
3518 /* Set the last flag for the MCU (assume there are no following
3519 * DOUTD's).
3520 */
3521 doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
3522 }
3523 pvr_pds_write_constant64(constants,
3524 control_word_constant[const_pair],
3525 doutw,
3526 0);
3527 }
3528
3529 control->data_size = data_size;
3530 } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
3531 /* Code section. */
3532
3533 for (uint32_t const_pair = 0; const_pair < control->num_const64;
3534 const_pair++) {
3535 /* DOUTW the PDS data to the USC constants. */
3536 *instruction++ = pvr_pds_encode_doutw64(
3537 /* cc */ 0,
3538 /* END */ control->last_instruction &&
3539 (const_pair + 1 == control->num_const64),
3540 /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit
3541 * Src1.
3542 */
3543 /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */
3544
3545 code_size++;
3546 }
3547
3548 if (control->last_instruction) {
3549 /* End the program. */
3550 *instruction++ = pvr_pds_inst_encode_halt(0);
3551 code_size++;
3552 }
3553
3554 control->code_size = code_size;
3555 }
3556
3557 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3558 return (constants + next_constant);
3559 else
3560 return instruction;
3561 }
3562
3563 /**
3564 * Generates generic kick DOUTU PDS program in a single data+code block.
3565 *
3566 * \param program Pointer to the PDS kick USC.
3567 * \param buffer Pointer to the buffer for the program.
3568 * \param start_next_constant Next constant in data segment. Non-zero if another
3569 * instruction precedes the DOUTU.
3570 * \param cc_enabled If true then the DOUTU is predicated (cc set).
3571 * \param gen_mode Either code and data can be generated or sizes only updated.
3572 * \returns Pointer to just beyond the buffer for the code or program segment.
3573 */
pvr_pds_kick_usc(struct pvr_pds_kickusc_program * restrict program,uint32_t * restrict buffer,uint32_t start_next_constant,bool cc_enabled,enum pvr_pds_generate_mode gen_mode)3574 uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
3575 uint32_t *restrict buffer,
3576 uint32_t start_next_constant,
3577 bool cc_enabled,
3578 enum pvr_pds_generate_mode gen_mode)
3579 {
3580 uint32_t constant = 0;
3581
3582 /* Automatically get a data size of 2 128bit chunks. */
3583 uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE;
3584 uint32_t code_size = 1; /* Single doutu */
3585 uint32_t dummy_count = 0;
3586
3587 /* Setup the data part. */
3588 uint32_t *constants = buffer; /* Constants placed at front of buffer. */
3589 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
3590 * dwords.
3591 */
3592
3593 /* Update the program sizes. */
3594 program->data_size = data_size;
3595 program->code_size = code_size;
3596 program->data_segment = constants;
3597
3598 if (gen_mode == PDS_GENERATE_SIZES)
3599 return NULL;
3600
3601 if (gen_mode == PDS_GENERATE_DATA_SEGMENT ||
3602 gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3603 /* Copy the USC task control words to constants. */
3604
3605 constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count);
3606
3607 pvr_pds_write_wide_constant(constants,
3608 constant + 0,
3609 program->usc_task_control.src0); /* 64-bit
3610 * Src0.
3611 */
3612 buffer += data_size;
3613
3614 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3615 return buffer;
3616 }
3617
3618 if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
3619 gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
3620 /* Generate the PDS pixel shader code. */
3621
3622 /* Setup the instruction pointer. */
3623 uint32_t *instruction = buffer;
3624
3625 /* Issue the task to the USC.
3626 *
3627 * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ;
3628 * halt halt
3629 */
3630
3631 *instruction++ = pvr_pds_encode_doutu(
3632 /* cc */ cc_enabled,
3633 /* END */ 1,
3634 /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU
3635 * 64-bit Src0
3636 */
3637
3638 /* Return pointer to just after last instruction. */
3639 return instruction;
3640 }
3641
3642 /* Execution should never reach here; keep compiler happy. */
3643 return NULL;
3644 }
3645
pvr_pds_generate_compute_barrier_conditional(uint32_t * buffer,enum pvr_pds_generate_mode gen_mode)3646 uint32_t *pvr_pds_generate_compute_barrier_conditional(
3647 uint32_t *buffer,
3648 enum pvr_pds_generate_mode gen_mode)
3649 {
3650 /* Compute barriers supported. Need to test for coeff sync task. */
3651
3652 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
3653 return buffer; /* No data segment. */
3654
3655 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3656 /* Test whether this is the coefficient update task or not. */
3657 *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3658 */
3659 PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3660 */
3661 PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC
3662 */
3663 1 /* ADDR */);
3664
3665 /* Encode a HALT. */
3666 *buffer++ = pvr_pds_inst_encode_halt(1);
3667
3668 /* Reset the default predicate to IF0. */
3669 *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
3670 */
3671 PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
3672 */
3673 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC
3674 */
3675 1 /* ADDR */);
3676 }
3677
3678 return buffer;
3679 }
3680
3681 /**
3682 * Generates program to kick the USC task to store shared.
3683 *
3684 * \param program Pointer to the PDS shared register.
3685 * \param buffer Pointer to the buffer for the program.
3686 * \param gen_mode Either code and data can be generated or sizes only updated.
3687 * \param dev_info PVR device information structure.
3688 * \returns Pointer to just beyond the buffer for the program.
3689 */
pvr_pds_generate_shared_storing_program(struct pvr_pds_shared_storing_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3690 uint32_t *pvr_pds_generate_shared_storing_program(
3691 struct pvr_pds_shared_storing_program *restrict program,
3692 uint32_t *restrict buffer,
3693 enum pvr_pds_generate_mode gen_mode,
3694 const struct pvr_device_info *dev_info)
3695 {
3696 struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3697 struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3698
3699 if (gen_mode == PDS_GENERATE_SIZES)
3700 return NULL;
3701
3702 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3703 uint32_t *constants = buffer;
3704
3705 constants =
3706 pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info);
3707 program->data_size = doutw_control->data_size;
3708
3709 constants = pvr_pds_kick_usc(kick_usc_program,
3710 constants,
3711 0,
3712 program->cc_enable,
3713 gen_mode);
3714 program->data_size += kick_usc_program->data_size;
3715
3716 return constants;
3717 }
3718
3719 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3720 /* Generate PDS code segment. */
3721 uint32_t *instruction = buffer;
3722
3723 /* doutw vi1, vi0
3724 * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
3725 * emit
3726 */
3727 instruction =
3728 pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info);
3729 program->code_size = doutw_control->code_size;
3730
3731 /* Offset into data segment follows on from doutw data segment. */
3732 instruction = pvr_pds_kick_usc(kick_usc_program,
3733 instruction,
3734 doutw_control->data_size,
3735 program->cc_enable,
3736 gen_mode);
3737 program->code_size += kick_usc_program->code_size;
3738
3739 return instruction;
3740 }
3741
3742 /* Execution should never reach here. */
3743 return NULL;
3744 }
3745
pvr_pds_generate_fence_terminate_program(struct pvr_pds_fence_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3746 uint32_t *pvr_pds_generate_fence_terminate_program(
3747 struct pvr_pds_fence_program *restrict program,
3748 uint32_t *restrict buffer,
3749 enum pvr_pds_generate_mode gen_mode,
3750 const struct pvr_device_info *dev_info)
3751 {
3752 uint32_t data_size = 0;
3753 uint32_t code_size = 0;
3754
3755 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3756 /* Data segment. */
3757 uint32_t *constants, *constants_base;
3758
3759 constants = constants_base = (uint32_t *)buffer;
3760
3761 /* DOUTC sources are not used, but they must be valid. */
3762 pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT);
3763 data_size += program->data_size;
3764
3765 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3766 /* Append a 64-bit constant with value 1. Used to increment ptemp.
3767 * Return the offset into the data segment.
3768 */
3769 program->fence_constant_word =
3770 pvr_pds_append_constant64(constants_base, 1, &data_size);
3771 }
3772
3773 program->data_size = data_size;
3774 return constants;
3775 }
3776
3777 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3778 /* Code segment. */
3779 uint32_t *instruction = (uint32_t *)buffer;
3780
3781 instruction = pvr_pds_generate_compute_barrier_conditional(
3782 instruction,
3783 PDS_GENERATE_CODE_SEGMENT);
3784 code_size += 3;
3785
3786 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
3787 /* lock */
3788 *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */
3789
3790 /* add64 pt[0], pt[0], #1 */
3791 *instruction++ = pvr_pds_inst_encode_add64(
3792 0, /* cc */
3793 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3794 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3795 PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0]
3796 */
3797 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3798 (program->fence_constant_word >> 1), /* src1 = 1 */
3799 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
3800 * ptemp[0]
3801 */
3802
3803 /* release */
3804 *instruction++ = pvr_pds_inst_encode_release(0); /* cc */
3805
3806 /* cmp pt[0] EQ 0x4 == Number of USC clusters per phantom */
3807 *instruction++ = pvr_pds_inst_encode_cmpi(
3808 0, /* cc */
3809 PVR_ROGUE_PDSINST_COP_EQ,
3810 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0
3811 * = ptemp[0]
3812 */
3813 PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0));
3814
3815 /* bra -1 */
3816 *instruction++ =
3817 pvr_pds_encode_bra(0, /* cc */
3818 1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE
3819 */
3820 0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0
3821 */
3822 -1); /* bra PC */
3823 code_size += 5;
3824 }
3825
3826 /* DOUTC */
3827 instruction = pvr_pds_generate_doutc(program,
3828 instruction,
3829 PDS_GENERATE_CODE_SEGMENT);
3830 code_size += program->code_size;
3831
3832 program->code_size = code_size;
3833 return instruction;
3834 }
3835
3836 /* Execution should never reach here. */
3837 return NULL;
3838 }
3839
3840 /**
3841 * Generates program to kick the USC task to load shared registers from memory.
3842 *
3843 * \param program Pointer to the PDS shared register.
3844 * \param buffer Pointer to the buffer for the program.
3845 * \param gen_mode Either code and data can be generated or sizes only updated.
3846 * \param dev_info PVR device information struct.
3847 * \returns Pointer to just beyond the buffer for the program.
3848 */
pvr_pds_generate_compute_shared_loading_program(struct pvr_pds_shared_storing_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)3849 uint32_t *pvr_pds_generate_compute_shared_loading_program(
3850 struct pvr_pds_shared_storing_program *restrict program,
3851 uint32_t *restrict buffer,
3852 enum pvr_pds_generate_mode gen_mode,
3853 const struct pvr_device_info *dev_info)
3854 {
3855 struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
3856 struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
3857
3858 uint32_t next_constant;
3859 uint32_t data_size = 0;
3860 uint32_t code_size = 0;
3861
3862 /* This needs to persist to the CODE_SEGMENT call. */
3863 static uint32_t fence_constant_word = 0;
3864 uint64_t zero_constant64 = 0;
3865
3866 if (gen_mode == PDS_GENERATE_SIZES)
3867 return NULL;
3868
3869 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
3870 uint32_t *constants = buffer;
3871
3872 constants = pvr_pds_generate_doutw(doutw_control,
3873 constants,
3874 PDS_GENERATE_DATA_SEGMENT,
3875 dev_info);
3876 data_size += doutw_control->data_size;
3877
3878 constants = pvr_pds_kick_usc(kick_usc_program,
3879 constants,
3880 0,
3881 program->cc_enable,
3882 gen_mode);
3883 data_size += kick_usc_program->data_size;
3884
3885 /* Copy the fence constant value (64-bit). */
3886 next_constant = data_size; /* Assumes data words fully packed. */
3887 fence_constant_word =
3888 pvr_pds_get_constants(&next_constant, 2, &data_size);
3889
3890 /* Encode the fence constant src0 (offset measured from start of data
3891 * buffer). Fence barrier is initialized to zero.
3892 */
3893 pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64);
3894 /* Update the const size. */
3895 data_size += 2;
3896 constants += 2;
3897
3898 program->data_size = data_size;
3899 return constants;
3900 }
3901
3902 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
3903 /* Generate PDS code segment. */
3904 uint32_t *instruction = buffer;
3905
3906 /* add64 pt0, c0, c0
3907 * IF [2x Phantoms]
3908 * add64 pt1, c0, c0
3909 * st [constant_mem_addr], pt0, 4
3910 * ENDIF
3911 * doutw vi1, vi0
3912 * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
3913 * emit
3914 *
3915 * Zero the persistent temp (SW fence for context switch).
3916 */
3917 *instruction++ = pvr_pds_inst_encode_add64(
3918 0, /* cc */
3919 PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
3920 PVR_ROGUE_PDSINST_MAD_SNA_ADD,
3921 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3922 (fence_constant_word >> 1), /* src0
3923 * = 0
3924 */
3925 PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
3926 (fence_constant_word >> 1), /* src1
3927 * = 0
3928 */
3929 PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0]
3930 */
3931 code_size++;
3932
3933 instruction = pvr_pds_generate_doutw(doutw_control,
3934 instruction,
3935 PDS_GENERATE_CODE_SEGMENT,
3936 dev_info);
3937 code_size += doutw_control->code_size;
3938
3939 /* Offset into data segment follows on from doutw data segment. */
3940 instruction = pvr_pds_kick_usc(kick_usc_program,
3941 instruction,
3942 doutw_control->data_size,
3943 program->cc_enable,
3944 gen_mode);
3945 code_size += kick_usc_program->code_size;
3946
3947 program->code_size = code_size;
3948 return instruction;
3949 }
3950
3951 /* Execution should never reach here. */
3952 return NULL;
3953 }
3954
3955 /**
3956 * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES.
3957 * Relies on num_fpu_iterators being initialized for size calculation.
3958 * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being
3959 * initialized for program generation.
3960 *
3961 * \param program Pointer to the PDS pixel shader program.
3962 * \param buffer Pointer to the buffer for the program.
3963 * \param gen_mode Either code and data can be generated or sizes only updated.
3964 * \returns Pointer to just beyond the buffer for the program.
3965 */
pvr_pds_coefficient_loading(struct pvr_pds_coeff_loading_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode)3966 uint32_t *pvr_pds_coefficient_loading(
3967 struct pvr_pds_coeff_loading_program *restrict program,
3968 uint32_t *restrict buffer,
3969 enum pvr_pds_generate_mode gen_mode)
3970 {
3971 uint32_t constant;
3972 uint32_t *instruction;
3973 uint32_t total_data_size, code_size;
3974
3975 /* Place constants at the front of the buffer. */
3976 uint32_t *constants = buffer;
3977 /* Start counting constants from 0. */
3978 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
3979
3980 /* Save the data segment pointer and size. */
3981 program->data_segment = constants;
3982
3983 total_data_size = 0;
3984 code_size = 0;
3985
3986 total_data_size += 2 * program->num_fpu_iterators;
3987 code_size += program->num_fpu_iterators;
3988
3989 /* Instructions start where constants finished, but we must take note of
3990 * alignment.
3991 *
3992 * 128-bit boundary = 4 dwords.
3993 */
3994 total_data_size = ALIGN_POT(total_data_size, 4);
3995 if (gen_mode != PDS_GENERATE_SIZES) {
3996 uint32_t data_size = 0;
3997 uint32_t iterator = 0;
3998
3999 instruction = buffer + total_data_size;
4000
4001 while (iterator < program->num_fpu_iterators) {
4002 uint64_t iterator_word;
4003
4004 /* Copy the USC task control words to constants. */
4005 constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
4006
4007 /* Write the first iterator. */
4008 iterator_word =
4009 (uint64_t)program->FPU_iterators[iterator]
4010 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT;
4011
4012 /* Write the destination. */
4013 iterator_word |=
4014 (uint64_t)program->destination[iterator++]
4015 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT;
4016
4017 /* If this is the last DOUTI word the "Last Issue" bit should be
4018 * set.
4019 */
4020 if (iterator >= program->num_fpu_iterators) {
4021 iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN;
4022 }
4023
4024 /* Write the word to the buffer. */
4025 pvr_pds_write_wide_constant(constants,
4026 constant,
4027 iterator_word); /* 64-bit
4028 Src0
4029 */
4030
4031 /* Write the DOUT instruction. */
4032 *instruction++ = pvr_pds_encode_douti(
4033 /* cc */ 0,
4034 /* END */ 0,
4035 /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */
4036 }
4037
4038 /* Update the last DOUTI instruction to have the END flag set. */
4039 *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT;
4040 } else {
4041 instruction = NULL;
4042 }
4043
4044 /* Update the data size and code size. Minimum temp count is 1. */
4045 program->temps_used = 1;
4046 program->data_size = total_data_size;
4047 program->code_size = code_size;
4048
4049 return instruction;
4050 }
4051
4052 /**
4053 * Generate a single ld/st instruction. This can correspond to one or more
4054 * real ld/st instructions based on the value of count.
4055 *
4056 * \param ld true to generate load, false to generate store.
4057 * \param control Cache mode control.
4058 * \param temp_index Dest temp for load/source temp for store, in 32bits
4059 * register index.
4060 * \param address Source for load/dest for store in bytes.
4061 * \param count Number of dwords for load/store.
4062 * \param next_constant
4063 * \param total_data_size
4064 * \param total_code_size
4065 * \param buffer Pointer to the buffer for the program.
4066 * \param data_fence Issue data fence.
4067 * \param gen_mode Either code and data can be generated or sizes only updated.
4068 * \param dev_info PVR device information structure.
4069 * \returns Pointer to just beyond the buffer for the program.
4070 */
pvr_pds_generate_single_ldst_instruction(bool ld,const struct pvr_pds_ldst_control * control,uint32_t temp_index,uint64_t address,uint32_t count,uint32_t * next_constant,uint32_t * total_data_size,uint32_t * total_code_size,uint32_t * restrict buffer,bool data_fence,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4071 uint32_t *pvr_pds_generate_single_ldst_instruction(
4072 bool ld,
4073 const struct pvr_pds_ldst_control *control,
4074 uint32_t temp_index,
4075 uint64_t address,
4076 uint32_t count,
4077 uint32_t *next_constant,
4078 uint32_t *total_data_size,
4079 uint32_t *total_code_size,
4080 uint32_t *restrict buffer,
4081 bool data_fence,
4082 enum pvr_pds_generate_mode gen_mode,
4083 const struct pvr_device_info *dev_info)
4084 {
4085 /* A single ld/ST here does NOT actually correspond to a single ld/ST
4086 * instruction, but may needs multiple ld/ST instructions because each ld/ST
4087 * instruction can only ld/ST a restricted max number of dwords which may
4088 * less than count passed here.
4089 */
4090
4091 uint32_t num_inst;
4092 uint32_t constant;
4093
4094 if (ld) {
4095 /* ld must operate on 64bits unit, and it needs to load from and to 128
4096 * bits aligned. Apart from the last ld, all the other need to ld 2x(x =
4097 * 1, 2, ...) times 64bits unit.
4098 */
4099 uint32_t per_inst_count = 0;
4100 uint32_t last_inst_count;
4101
4102 assert((gen_mode == PDS_GENERATE_SIZES) ||
4103 (((count % 2) == 0) && ((address % 16) == 0) &&
4104 (temp_index % 2) == 0));
4105
4106 count >>= 1;
4107 temp_index >>= 1;
4108
4109 /* Found out how many ld instructions are needed and ld size for the all
4110 * possible ld instructions.
4111 */
4112 if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) {
4113 num_inst = 1;
4114 last_inst_count = count;
4115 } else {
4116 per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE;
4117 if ((per_inst_count % 2) != 0)
4118 per_inst_count -= 1;
4119
4120 num_inst = count / per_inst_count;
4121 last_inst_count = count - per_inst_count * num_inst;
4122 num_inst += 1;
4123 }
4124
4125 /* Generate all the instructions. */
4126 for (uint32_t i = 0; i < num_inst; i++) {
4127 if ((i == (num_inst - 1)) && (last_inst_count == 0))
4128 break;
4129
4130 /* A single load instruction. */
4131 constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4132
4133 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4134 uint64_t ld_src0 = 0;
4135
4136 ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
4137 << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
4138 ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count
4139 : per_inst_count) &
4140 PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
4141 << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
4142 ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK)
4143 << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
4144
4145 if (!control) {
4146 ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED;
4147
4148 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
4149 ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED;
4150
4151 } else {
4152 ld_src0 |= control->cache_control_const;
4153 }
4154
4155 /* Write it to the constant. */
4156 pvr_pds_write_constant64(buffer,
4157 constant,
4158 (uint32_t)(ld_src0),
4159 (uint32_t)(ld_src0 >> 32));
4160
4161 /* Adjust value for next ld instruction. */
4162 temp_index += per_inst_count;
4163 address += (((uint64_t)(per_inst_count)) << 3);
4164 }
4165
4166 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4167 *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1);
4168
4169 if (data_fence)
4170 *buffer++ = pvr_pds_inst_encode_wdf(0);
4171 }
4172 }
4173 } else {
4174 /* ST needs source memory address to be 32bits aligned. */
4175 assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0));
4176
4177 /* Found out how many ST instructions are needed, each ST can only store
4178 * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits.
4179 */
4180 num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE;
4181 num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1);
4182
4183 /* Generate all the instructions. */
4184 for (uint32_t i = 0; i < num_inst; i++) {
4185 /* A single store instruction. */
4186 constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
4187
4188 if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
4189 uint32_t per_inst_count =
4190 (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE
4191 ? count
4192 : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE);
4193 uint64_t st_src0 = 0;
4194
4195 st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
4196 << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
4197 st_src0 |=
4198 (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
4199 << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
4200 st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK)
4201 << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
4202
4203 if (!control) {
4204 st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH;
4205
4206 if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
4207 st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH;
4208 }
4209
4210 } else {
4211 st_src0 |= control->cache_control_const;
4212 }
4213
4214 /* Write it to the constant. */
4215 pvr_pds_write_constant64(buffer,
4216 constant,
4217 (uint32_t)(st_src0),
4218 (uint32_t)(st_src0 >> 32));
4219
4220 /* Adjust value for next ST instruction. */
4221 temp_index += per_inst_count;
4222 count -= per_inst_count;
4223 address += (((uint64_t)(per_inst_count)) << 2);
4224 }
4225
4226 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4227 *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1);
4228
4229 if (data_fence)
4230 *buffer++ = pvr_pds_inst_encode_wdf(0);
4231 }
4232 }
4233 }
4234
4235 (*total_code_size) += num_inst;
4236 if (data_fence)
4237 (*total_code_size) += num_inst;
4238
4239 if (gen_mode != PDS_GENERATE_SIZES)
4240 return buffer;
4241 return NULL;
4242 }
4243
4244 /**
4245 * Generate programs used to prepare stream out, i.e., clear stream out buffer
4246 * overflow flags and update Persistent temps by a ld instruction.
4247 *
4248 * This must be used in PPP state update.
4249 *
4250 * \param program Pointer to the stream out program.
4251 * \param buffer Pointer to the buffer for the program.
4252 * \param store_mode If true then the data is stored to memory. If false then
4253 * the data is loaded from memory.
4254 * \param gen_mode Either code and data can be generated or sizes only updated.
4255 * \param dev_info PVR device information structure.
4256 * \returns Pointer to just beyond the buffer for the program.
4257 */
pvr_pds_generate_stream_out_init_program(struct pvr_pds_stream_out_init_program * restrict program,uint32_t * restrict buffer,bool store_mode,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4258 uint32_t *pvr_pds_generate_stream_out_init_program(
4259 struct pvr_pds_stream_out_init_program *restrict program,
4260 uint32_t *restrict buffer,
4261 bool store_mode,
4262 enum pvr_pds_generate_mode gen_mode,
4263 const struct pvr_device_info *dev_info)
4264 {
4265 uint32_t total_data_size = 0;
4266 uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
4267
4268 /* Start counting constants from 0. */
4269 uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4270
4271 uint32_t total_code_size = 1;
4272
4273 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4274 /* We only need to clear global stream out predicate, other predicates
4275 * are not used during the stream out buffer overflow test.
4276 */
4277 *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10);
4278 }
4279
4280 for (uint32_t index = 0; index < program->num_buffers; index++) {
4281 if (program->dev_address_for_buffer_data[index] != 0) {
4282 /* Generate load/store program to load/store persistent temps. */
4283
4284 /* NOTE: store_mode == true case should be handled by
4285 * StreamOutTerminate.
4286 */
4287 buffer = pvr_pds_generate_single_ldst_instruction(
4288 !store_mode,
4289 NULL,
4290 PTDst,
4291 program->dev_address_for_buffer_data[index],
4292 program->pds_buffer_data_size[index],
4293 &next_constant,
4294 &total_data_size,
4295 &total_code_size,
4296 buffer,
4297 false,
4298 gen_mode,
4299 dev_info);
4300 }
4301
4302 PTDst += program->pds_buffer_data_size[index];
4303 }
4304
4305 total_code_size += 2;
4306
4307 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4308 /* We need to fence the loading. */
4309 *buffer++ = pvr_pds_inst_encode_wdf(0);
4310 *buffer++ = pvr_pds_inst_encode_halt(0);
4311 }
4312
4313 /* Save size information to program */
4314 program->stream_out_init_pds_data_size =
4315 ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4316 /* PDS program code size. */
4317 program->stream_out_init_pds_code_size = total_code_size;
4318
4319 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4320 return buffer + program->stream_out_init_pds_data_size;
4321 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4322 return buffer;
4323
4324 return NULL;
4325 }
4326
4327 /**
4328 * Generate stream out terminate program for stream out.
4329 *
4330 * If pds_persistent_temp_size_to_store is 0, the final primitive written value
4331 * will be stored.
4332 *
4333 * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps
4334 * will be stored into memory.
4335 *
4336 * The stream out terminate program is used to update the PPP state and the data
4337 * and code section cannot be separate.
4338 *
4339 * \param program Pointer to the stream out program.
4340 * \param buffer Pointer to the buffer for the program.
4341 * \param gen_mode Either code and data can be generated or sizes only updated.
4342 * \param dev_info PVR device info structure.
4343 * \returns Pointer to just beyond the buffer for the program.
4344 */
pvr_pds_generate_stream_out_terminate_program(struct pvr_pds_stream_out_terminate_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4345 uint32_t *pvr_pds_generate_stream_out_terminate_program(
4346 struct pvr_pds_stream_out_terminate_program *restrict program,
4347 uint32_t *restrict buffer,
4348 enum pvr_pds_generate_mode gen_mode,
4349 const struct pvr_device_info *dev_info)
4350 {
4351 uint32_t next_constant;
4352 uint32_t total_data_size = 0, total_code_size = 0;
4353
4354 /* Start counting constants from 0. */
4355 next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
4356
4357 /* Generate store program to store persistent temps. */
4358 buffer = pvr_pds_generate_single_ldst_instruction(
4359 false,
4360 NULL,
4361 PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER,
4362 program->dev_address_for_storing_persistent_temp,
4363 program->pds_persistent_temp_size_to_store,
4364 &next_constant,
4365 &total_data_size,
4366 &total_code_size,
4367 buffer,
4368 false,
4369 gen_mode,
4370 dev_info);
4371
4372 total_code_size += 2;
4373 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4374 *buffer++ = pvr_pds_inst_encode_wdf(0);
4375 *buffer++ = pvr_pds_inst_encode_halt(0);
4376 }
4377
4378 /* Save size information to program. */
4379 program->stream_out_terminate_pds_data_size =
4380 ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
4381 /* PDS program code size. */
4382 program->stream_out_terminate_pds_code_size = total_code_size;
4383
4384 if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
4385 return buffer + program->stream_out_terminate_pds_data_size;
4386 else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
4387 return buffer;
4388
4389 return NULL;
4390 }
4391
4392 /* DrawArrays works in several steps:
4393 *
4394 * 1) load data from draw_indirect buffer
4395 * 2) tweak data to match hardware formats
4396 * 3) write data to indexblock
4397 * 4) signal the VDM to continue
4398 *
4399 * This is complicated by HW limitations on alignment, as well as a HWBRN.
4400 *
4401 * 1) Load data.
4402 * Loads _must_ be 128-bit aligned. Because there is no such limitation in the
4403 * spec we must deal with this by choosing an appropriate earlier address and
4404 * loading enough dwords that we load the entirety of the buffer.
4405 *
4406 * if addr & 0xf:
4407 * load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5]
4408 * data = tmp[0 + (uiAddr & 0xf) >> 2]...
4409 * else
4410 * load [addr] 4 dwords -> tmp[0, 1, 2, 3]
4411 * data = tmp[0]...
4412 *
4413 *
4414 * 2) Tweak data.
4415 * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in
4416 * the VDM control stream. We must subtract 1 from the loaded primCount.
4417 *
4418 * However, there is a HWBRN that disallows the ADD32 instruction from sourcing
4419 * a tmp that is non-64-bit-aligned. To work around this, we must move primCount
4420 * into another tmp that has the correct alignment. Note: this is only required
4421 * when data = tmp[even], as primCount is data+1:
4422 *
4423 * if data = tmp[even]:
4424 * primCount = data + 1 = tmp[odd] -- not 64-bit aligned!
4425 * else:
4426 * primCount = data + 1 = tmp[even] -- already aligned, don't need workaround.
4427 *
4428 * This boils down to:
4429 *
4430 * primCount = data[1]
4431 * primCountSrc = data[1]
4432 * if brn_present && (data is even):
4433 * mov scratch, primCount
4434 * primCountSrc = scratch
4435 * endif
4436 * sub primCount, primCountSrc, 1
4437 *
4438 * 3) Store Data.
4439 * Write the now-tweaked data over the top of the indexblock.
4440 * To ensure the write completes before the VDM re-reads the data, we must cause
4441 * a data hazard by doing a dummy (dummy meaning we don't care about the
4442 * returned data) load from the same addresses. Again, because the ld must
4443 * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the
4444 * index block is 128-bit aligned. This is the client driver's responsibility.
4445 *
4446 * st data[0, 1, 2] -> (idxblock + 4)
4447 * load [idxblock] 4 dwords
4448 *
4449 * 4) Signal the VDM
4450 * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue
4451 * where it is currently fenced on a dummy idxblock that has been inserted by
4452 * the driver.
4453 */
4454
4455 #include "pvr_draw_indirect_arrays0.h"
4456 #include "pvr_draw_indirect_arrays1.h"
4457 #include "pvr_draw_indirect_arrays2.h"
4458 #include "pvr_draw_indirect_arrays3.h"
4459
4460 #include "pvr_draw_indirect_arrays_base_instance0.h"
4461 #include "pvr_draw_indirect_arrays_base_instance1.h"
4462 #include "pvr_draw_indirect_arrays_base_instance2.h"
4463 #include "pvr_draw_indirect_arrays_base_instance3.h"
4464
4465 #include "pvr_draw_indirect_arrays_base_instance_drawid0.h"
4466 #include "pvr_draw_indirect_arrays_base_instance_drawid1.h"
4467 #include "pvr_draw_indirect_arrays_base_instance_drawid2.h"
4468 #include "pvr_draw_indirect_arrays_base_instance_drawid3.h"
4469
4470 #define ENABLE_SLC_MCU_CACHE_CONTROLS(device) \
4471 ((device)->features.has_slc_mcu_cache_controls \
4472 ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
4473 : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS)
4474
pvr_pds_generate_draw_arrays_indirect(struct pvr_pds_drawindirect_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4475 void pvr_pds_generate_draw_arrays_indirect(
4476 struct pvr_pds_drawindirect_program *restrict program,
4477 uint32_t *restrict buffer,
4478 enum pvr_pds_generate_mode gen_mode,
4479 const struct pvr_device_info *dev_info)
4480 {
4481 if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4482 (gen_mode == PDS_GENERATE_SIZES)) {
4483 const struct pvr_psc_program_output *psc_program = NULL;
4484 switch ((program->arg_buffer >> 2) % 4) {
4485 case 0:
4486 if (program->support_base_instance) {
4487 if (program->increment_draw_id) {
4488 psc_program =
4489 &pvr_draw_indirect_arrays_base_instance_drawid0_program;
4490 } else {
4491 psc_program = &pvr_draw_indirect_arrays_base_instance0_program;
4492 }
4493 } else {
4494 psc_program = &pvr_draw_indirect_arrays0_program;
4495 }
4496 break;
4497 case 1:
4498 if (program->support_base_instance) {
4499 if (program->increment_draw_id) {
4500 psc_program =
4501 &pvr_draw_indirect_arrays_base_instance_drawid1_program;
4502 } else {
4503 psc_program = &pvr_draw_indirect_arrays_base_instance1_program;
4504 }
4505 } else {
4506 psc_program = &pvr_draw_indirect_arrays1_program;
4507 }
4508 break;
4509 case 2:
4510 if (program->support_base_instance) {
4511 if (program->increment_draw_id) {
4512 psc_program =
4513 &pvr_draw_indirect_arrays_base_instance_drawid2_program;
4514 } else {
4515 psc_program = &pvr_draw_indirect_arrays_base_instance2_program;
4516 }
4517 } else {
4518 psc_program = &pvr_draw_indirect_arrays2_program;
4519 }
4520 break;
4521 case 3:
4522 if (program->support_base_instance) {
4523 if (program->increment_draw_id) {
4524 psc_program =
4525 &pvr_draw_indirect_arrays_base_instance_drawid3_program;
4526 } else {
4527 psc_program = &pvr_draw_indirect_arrays_base_instance3_program;
4528 }
4529 } else {
4530 psc_program = &pvr_draw_indirect_arrays3_program;
4531 }
4532 break;
4533 }
4534
4535 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4536 memcpy(buffer,
4537 psc_program->code,
4538 psc_program->code_size * sizeof(uint32_t));
4539 #if defined(DUMP_PDS)
4540 for (uint32_t i = 0; i < psc_program->code_size; i++)
4541 PVR_PDS_PRINT_INST(buffer[i]);
4542 #endif
4543 }
4544
4545 program->program = *psc_program;
4546 } else {
4547 switch ((program->arg_buffer >> 2) % 4) {
4548 case 0:
4549 if (program->support_base_instance) {
4550 if (program->increment_draw_id) {
4551 pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(
4552 buffer,
4553 program->arg_buffer & ~0xfull,
4554 dev_info);
4555 pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(
4556 buffer,
4557 program->index_list_addr_buffer + 4);
4558 pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(
4559 buffer,
4560 program->index_list_addr_buffer);
4561 pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(
4562 buffer,
4563 program->num_views);
4564 pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates(
4565 buffer);
4566 } else {
4567 pvr_write_draw_indirect_arrays_base_instance0_di_data(
4568 buffer,
4569 program->arg_buffer & ~0xfull,
4570 dev_info);
4571 pvr_write_draw_indirect_arrays_base_instance0_write_vdm(
4572 buffer,
4573 program->index_list_addr_buffer + 4);
4574 pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(
4575 buffer,
4576 program->index_list_addr_buffer);
4577 pvr_write_draw_indirect_arrays_base_instance0_num_views(
4578 buffer,
4579 program->num_views);
4580 pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer);
4581 }
4582 } else {
4583 pvr_write_draw_indirect_arrays0_di_data(buffer,
4584 program->arg_buffer &
4585 ~0xfull,
4586 dev_info);
4587 pvr_write_draw_indirect_arrays0_write_vdm(
4588 buffer,
4589 program->index_list_addr_buffer + 4);
4590 pvr_write_draw_indirect_arrays0_flush_vdm(
4591 buffer,
4592 program->index_list_addr_buffer);
4593 pvr_write_draw_indirect_arrays0_num_views(buffer,
4594 program->num_views);
4595 pvr_write_draw_indirect_arrays0_immediates(buffer);
4596 }
4597 break;
4598 case 1:
4599 if (program->support_base_instance) {
4600 if (program->increment_draw_id) {
4601 pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(
4602 buffer,
4603 program->arg_buffer & ~0xfull,
4604 dev_info);
4605 pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(
4606 buffer,
4607 program->index_list_addr_buffer + 4);
4608 pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(
4609 buffer,
4610 program->index_list_addr_buffer);
4611 pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(
4612 buffer,
4613 program->num_views);
4614 pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates(
4615 buffer);
4616 } else {
4617 pvr_write_draw_indirect_arrays_base_instance1_di_data(
4618 buffer,
4619 program->arg_buffer & ~0xfull,
4620 dev_info);
4621 pvr_write_draw_indirect_arrays_base_instance1_write_vdm(
4622 buffer,
4623 program->index_list_addr_buffer + 4);
4624 pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(
4625 buffer,
4626 program->index_list_addr_buffer);
4627 pvr_write_draw_indirect_arrays_base_instance1_num_views(
4628 buffer,
4629 program->num_views);
4630 pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer);
4631 }
4632 } else {
4633 pvr_write_draw_indirect_arrays1_di_data(buffer,
4634 program->arg_buffer &
4635 ~0xfull,
4636 dev_info);
4637 pvr_write_draw_indirect_arrays1_write_vdm(
4638 buffer,
4639 program->index_list_addr_buffer + 4);
4640 pvr_write_draw_indirect_arrays1_flush_vdm(
4641 buffer,
4642 program->index_list_addr_buffer);
4643 pvr_write_draw_indirect_arrays1_num_views(buffer,
4644 program->num_views);
4645 pvr_write_draw_indirect_arrays1_immediates(buffer);
4646 }
4647 break;
4648 case 2:
4649 if (program->support_base_instance) {
4650 if (program->increment_draw_id) {
4651 pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(
4652 buffer,
4653 program->arg_buffer & ~0xfull,
4654 dev_info);
4655 pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(
4656 buffer,
4657 program->index_list_addr_buffer + 4);
4658 pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(
4659 buffer,
4660 program->index_list_addr_buffer);
4661 pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(
4662 buffer,
4663 program->num_views);
4664 pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates(
4665 buffer);
4666 } else {
4667 pvr_write_draw_indirect_arrays_base_instance2_di_data(
4668 buffer,
4669 program->arg_buffer & ~0xfull,
4670 dev_info);
4671 pvr_write_draw_indirect_arrays_base_instance2_write_vdm(
4672 buffer,
4673 program->index_list_addr_buffer + 4);
4674 pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(
4675 buffer,
4676 program->index_list_addr_buffer);
4677 pvr_write_draw_indirect_arrays_base_instance2_num_views(
4678 buffer,
4679 program->num_views);
4680 pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer);
4681 }
4682 } else {
4683 pvr_write_draw_indirect_arrays2_di_data(buffer,
4684 program->arg_buffer &
4685 ~0xfull,
4686 dev_info);
4687 pvr_write_draw_indirect_arrays2_write_vdm(
4688 buffer,
4689 program->index_list_addr_buffer + 4);
4690 pvr_write_draw_indirect_arrays2_flush_vdm(
4691 buffer,
4692 program->index_list_addr_buffer);
4693 pvr_write_draw_indirect_arrays2_num_views(buffer,
4694 program->num_views);
4695 pvr_write_draw_indirect_arrays2_immediates(buffer);
4696 }
4697 break;
4698 case 3:
4699 if (program->support_base_instance) {
4700 if (program->increment_draw_id) {
4701 pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(
4702 buffer,
4703 program->arg_buffer & ~0xfull,
4704 dev_info);
4705 pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(
4706 buffer,
4707 program->index_list_addr_buffer + 4);
4708 pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(
4709 buffer,
4710 program->index_list_addr_buffer);
4711 pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(
4712 buffer,
4713 program->num_views);
4714 pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates(
4715 buffer);
4716 } else {
4717 pvr_write_draw_indirect_arrays_base_instance3_di_data(
4718 buffer,
4719 program->arg_buffer & ~0xfull,
4720 dev_info);
4721 pvr_write_draw_indirect_arrays_base_instance3_write_vdm(
4722 buffer,
4723 program->index_list_addr_buffer + 4);
4724 pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(
4725 buffer,
4726 program->index_list_addr_buffer);
4727 pvr_write_draw_indirect_arrays_base_instance3_num_views(
4728 buffer,
4729 program->num_views);
4730 pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer);
4731 }
4732 } else {
4733 pvr_write_draw_indirect_arrays3_di_data(buffer,
4734 program->arg_buffer &
4735 ~0xfull,
4736 dev_info);
4737 pvr_write_draw_indirect_arrays3_write_vdm(
4738 buffer,
4739 program->index_list_addr_buffer + 4);
4740 pvr_write_draw_indirect_arrays3_flush_vdm(
4741 buffer,
4742 program->index_list_addr_buffer);
4743 pvr_write_draw_indirect_arrays3_num_views(buffer,
4744 program->num_views);
4745 pvr_write_draw_indirect_arrays3_immediates(buffer);
4746 }
4747 break;
4748 }
4749 }
4750 }
4751
4752 #include "pvr_draw_indirect_elements0.h"
4753 #include "pvr_draw_indirect_elements1.h"
4754 #include "pvr_draw_indirect_elements2.h"
4755 #include "pvr_draw_indirect_elements3.h"
4756 #include "pvr_draw_indirect_elements_base_instance0.h"
4757 #include "pvr_draw_indirect_elements_base_instance1.h"
4758 #include "pvr_draw_indirect_elements_base_instance2.h"
4759 #include "pvr_draw_indirect_elements_base_instance3.h"
4760 #include "pvr_draw_indirect_elements_base_instance_drawid0.h"
4761 #include "pvr_draw_indirect_elements_base_instance_drawid1.h"
4762 #include "pvr_draw_indirect_elements_base_instance_drawid2.h"
4763 #include "pvr_draw_indirect_elements_base_instance_drawid3.h"
4764
pvr_pds_generate_draw_elements_indirect(struct pvr_pds_drawindirect_program * restrict program,uint32_t * restrict buffer,enum pvr_pds_generate_mode gen_mode,const struct pvr_device_info * dev_info)4765 void pvr_pds_generate_draw_elements_indirect(
4766 struct pvr_pds_drawindirect_program *restrict program,
4767 uint32_t *restrict buffer,
4768 enum pvr_pds_generate_mode gen_mode,
4769 const struct pvr_device_info *dev_info)
4770 {
4771 if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
4772 (gen_mode == PDS_GENERATE_SIZES)) {
4773 const struct pvr_psc_program_output *psc_program = NULL;
4774 switch ((program->arg_buffer >> 2) % 4) {
4775 case 0:
4776 if (program->support_base_instance) {
4777 if (program->increment_draw_id) {
4778 psc_program =
4779 &pvr_draw_indirect_elements_base_instance_drawid0_program;
4780 } else {
4781 psc_program = &pvr_draw_indirect_elements_base_instance0_program;
4782 }
4783 } else {
4784 psc_program = &pvr_draw_indirect_elements0_program;
4785 }
4786 break;
4787 case 1:
4788 if (program->support_base_instance) {
4789 if (program->increment_draw_id) {
4790 psc_program =
4791 &pvr_draw_indirect_elements_base_instance_drawid1_program;
4792 } else {
4793 psc_program = &pvr_draw_indirect_elements_base_instance1_program;
4794 }
4795 } else {
4796 psc_program = &pvr_draw_indirect_elements1_program;
4797 }
4798 break;
4799 case 2:
4800 if (program->support_base_instance) {
4801 if (program->increment_draw_id) {
4802 psc_program =
4803 &pvr_draw_indirect_elements_base_instance_drawid2_program;
4804 } else {
4805 psc_program = &pvr_draw_indirect_elements_base_instance2_program;
4806 }
4807 } else {
4808 psc_program = &pvr_draw_indirect_elements2_program;
4809 }
4810 break;
4811 case 3:
4812 if (program->support_base_instance) {
4813 if (program->increment_draw_id) {
4814 psc_program =
4815 &pvr_draw_indirect_elements_base_instance_drawid3_program;
4816 } else {
4817 psc_program = &pvr_draw_indirect_elements_base_instance3_program;
4818 }
4819 } else {
4820 psc_program = &pvr_draw_indirect_elements3_program;
4821 }
4822 break;
4823 }
4824
4825 if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
4826 memcpy(buffer,
4827 psc_program->code,
4828 psc_program->code_size * sizeof(uint32_t));
4829
4830 #if defined(DUMP_PDS)
4831 for (uint32_t i = 0; i < psc_program->code_size; i++)
4832 PVR_PDS_PRINT_INST(buffer[i]);
4833 #endif
4834 }
4835
4836 program->program = *psc_program;
4837 } else {
4838 switch ((program->arg_buffer >> 2) % 4) {
4839 case 0:
4840 if (program->support_base_instance) {
4841 if (program->increment_draw_id) {
4842 pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(
4843 buffer,
4844 program->arg_buffer & ~0xfull,
4845 dev_info);
4846 pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm(
4847 buffer,
4848 program->index_list_addr_buffer);
4849 pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm(
4850 buffer,
4851 program->index_list_addr_buffer);
4852 pvr_write_draw_indirect_elements_base_instance_drawid0_num_views(
4853 buffer,
4854 program->num_views);
4855 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride(
4856 buffer,
4857 program->index_stride);
4858 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base(
4859 buffer,
4860 program->index_buffer);
4861 pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header(
4862 buffer,
4863 program->index_block_header);
4864 pvr_write_draw_indirect_elements_base_instance_drawid0_immediates(
4865 buffer);
4866 } else {
4867 pvr_write_draw_indirect_elements_base_instance0_di_data(
4868 buffer,
4869 program->arg_buffer & ~0xfull,
4870 dev_info);
4871 pvr_write_draw_indirect_elements_base_instance0_write_vdm(
4872 buffer,
4873 program->index_list_addr_buffer);
4874 pvr_write_draw_indirect_elements_base_instance0_flush_vdm(
4875 buffer,
4876 program->index_list_addr_buffer);
4877 pvr_write_draw_indirect_elements_base_instance0_num_views(
4878 buffer,
4879 program->num_views);
4880 pvr_write_draw_indirect_elements_base_instance0_idx_stride(
4881 buffer,
4882 program->index_stride);
4883 pvr_write_draw_indirect_elements_base_instance0_idx_base(
4884 buffer,
4885 program->index_buffer);
4886 pvr_write_draw_indirect_elements_base_instance0_idx_header(
4887 buffer,
4888 program->index_block_header);
4889 pvr_write_draw_indirect_elements_base_instance0_immediates(
4890 buffer);
4891 }
4892 } else {
4893 pvr_write_draw_indirect_elements0_di_data(buffer,
4894 program->arg_buffer &
4895 ~0xfull,
4896 dev_info);
4897 pvr_write_draw_indirect_elements0_write_vdm(
4898 buffer,
4899 program->index_list_addr_buffer);
4900 pvr_write_draw_indirect_elements0_flush_vdm(
4901 buffer,
4902 program->index_list_addr_buffer);
4903 pvr_write_draw_indirect_elements0_num_views(buffer,
4904 program->num_views);
4905 pvr_write_draw_indirect_elements0_idx_stride(buffer,
4906 program->index_stride);
4907 pvr_write_draw_indirect_elements0_idx_base(buffer,
4908 program->index_buffer);
4909 pvr_write_draw_indirect_elements0_idx_header(
4910 buffer,
4911 program->index_block_header);
4912 pvr_write_draw_indirect_elements0_immediates(buffer);
4913 }
4914 break;
4915 case 1:
4916 if (program->support_base_instance) {
4917 if (program->increment_draw_id) {
4918 pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(
4919 buffer,
4920 program->arg_buffer & ~0xfull,
4921 dev_info);
4922 pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm(
4923 buffer,
4924 program->index_list_addr_buffer);
4925 pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm(
4926 buffer,
4927 program->index_list_addr_buffer);
4928 pvr_write_draw_indirect_elements_base_instance_drawid1_num_views(
4929 buffer,
4930 program->num_views);
4931 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride(
4932 buffer,
4933 program->index_stride);
4934 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base(
4935 buffer,
4936 program->index_buffer);
4937 pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header(
4938 buffer,
4939 program->index_block_header);
4940 pvr_write_draw_indirect_elements_base_instance_drawid1_immediates(
4941 buffer);
4942 } else {
4943 pvr_write_draw_indirect_elements_base_instance1_di_data(
4944 buffer,
4945 program->arg_buffer & ~0xfull,
4946 dev_info);
4947 pvr_write_draw_indirect_elements_base_instance1_write_vdm(
4948 buffer,
4949 program->index_list_addr_buffer);
4950 pvr_write_draw_indirect_elements_base_instance1_flush_vdm(
4951 buffer,
4952 program->index_list_addr_buffer);
4953 pvr_write_draw_indirect_elements_base_instance1_num_views(
4954 buffer,
4955 program->num_views);
4956 pvr_write_draw_indirect_elements_base_instance1_idx_stride(
4957 buffer,
4958 program->index_stride);
4959 pvr_write_draw_indirect_elements_base_instance1_idx_base(
4960 buffer,
4961 program->index_buffer);
4962 pvr_write_draw_indirect_elements_base_instance1_idx_header(
4963 buffer,
4964 program->index_block_header);
4965 pvr_write_draw_indirect_elements_base_instance1_immediates(
4966 buffer);
4967 }
4968 } else {
4969 pvr_write_draw_indirect_elements1_di_data(buffer,
4970 program->arg_buffer &
4971 ~0xfull,
4972 dev_info);
4973 pvr_write_draw_indirect_elements1_write_vdm(
4974 buffer,
4975 program->index_list_addr_buffer);
4976 pvr_write_draw_indirect_elements1_flush_vdm(
4977 buffer,
4978 program->index_list_addr_buffer);
4979 pvr_write_draw_indirect_elements1_num_views(buffer,
4980 program->num_views);
4981 pvr_write_draw_indirect_elements1_idx_stride(buffer,
4982 program->index_stride);
4983 pvr_write_draw_indirect_elements1_idx_base(buffer,
4984 program->index_buffer);
4985 pvr_write_draw_indirect_elements1_idx_header(
4986 buffer,
4987 program->index_block_header);
4988 pvr_write_draw_indirect_elements1_immediates(buffer);
4989 }
4990 break;
4991 case 2:
4992 if (program->support_base_instance) {
4993 if (program->increment_draw_id) {
4994 pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(
4995 buffer,
4996 program->arg_buffer & ~0xfull,
4997 dev_info);
4998 pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm(
4999 buffer,
5000 program->index_list_addr_buffer);
5001 pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm(
5002 buffer,
5003 program->index_list_addr_buffer);
5004 pvr_write_draw_indirect_elements_base_instance_drawid2_num_views(
5005 buffer,
5006 program->num_views);
5007 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride(
5008 buffer,
5009 program->index_stride);
5010 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base(
5011 buffer,
5012 program->index_buffer);
5013 pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header(
5014 buffer,
5015 program->index_block_header);
5016 pvr_write_draw_indirect_elements_base_instance_drawid2_immediates(
5017 buffer);
5018 } else {
5019 pvr_write_draw_indirect_elements_base_instance2_di_data(
5020 buffer,
5021 program->arg_buffer & ~0xfull,
5022 dev_info);
5023 pvr_write_draw_indirect_elements_base_instance2_write_vdm(
5024 buffer,
5025 program->index_list_addr_buffer);
5026 pvr_write_draw_indirect_elements_base_instance2_flush_vdm(
5027 buffer,
5028 program->index_list_addr_buffer);
5029 pvr_write_draw_indirect_elements_base_instance2_num_views(
5030 buffer,
5031 program->num_views);
5032 pvr_write_draw_indirect_elements_base_instance2_idx_stride(
5033 buffer,
5034 program->index_stride);
5035 pvr_write_draw_indirect_elements_base_instance2_idx_base(
5036 buffer,
5037 program->index_buffer);
5038 pvr_write_draw_indirect_elements_base_instance2_idx_header(
5039 buffer,
5040 program->index_block_header);
5041 pvr_write_draw_indirect_elements_base_instance2_immediates(
5042 buffer);
5043 }
5044 } else {
5045 pvr_write_draw_indirect_elements2_di_data(buffer,
5046 program->arg_buffer &
5047 ~0xfull,
5048 dev_info);
5049 pvr_write_draw_indirect_elements2_write_vdm(
5050 buffer,
5051 program->index_list_addr_buffer);
5052 pvr_write_draw_indirect_elements2_flush_vdm(
5053 buffer,
5054 program->index_list_addr_buffer);
5055 pvr_write_draw_indirect_elements2_num_views(buffer,
5056 program->num_views);
5057 pvr_write_draw_indirect_elements2_idx_stride(buffer,
5058 program->index_stride);
5059 pvr_write_draw_indirect_elements2_idx_base(buffer,
5060 program->index_buffer);
5061 pvr_write_draw_indirect_elements2_idx_header(
5062 buffer,
5063 program->index_block_header);
5064 pvr_write_draw_indirect_elements2_immediates(buffer);
5065 }
5066 break;
5067 case 3:
5068 if (program->support_base_instance) {
5069 if (program->increment_draw_id) {
5070 pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(
5071 buffer,
5072 program->arg_buffer & ~0xfull,
5073 dev_info);
5074 pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm(
5075 buffer,
5076 program->index_list_addr_buffer);
5077 pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm(
5078 buffer,
5079 program->index_list_addr_buffer);
5080 pvr_write_draw_indirect_elements_base_instance_drawid3_num_views(
5081 buffer,
5082 program->num_views);
5083 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride(
5084 buffer,
5085 program->index_stride);
5086 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base(
5087 buffer,
5088 program->index_buffer);
5089 pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header(
5090 buffer,
5091 program->index_block_header);
5092 pvr_write_draw_indirect_elements_base_instance_drawid3_immediates(
5093 buffer);
5094 } else {
5095 pvr_write_draw_indirect_elements_base_instance3_di_data(
5096 buffer,
5097 program->arg_buffer & ~0xfull,
5098 dev_info);
5099 pvr_write_draw_indirect_elements_base_instance3_write_vdm(
5100 buffer,
5101 program->index_list_addr_buffer);
5102 pvr_write_draw_indirect_elements_base_instance3_flush_vdm(
5103 buffer,
5104 program->index_list_addr_buffer);
5105 pvr_write_draw_indirect_elements_base_instance3_num_views(
5106 buffer,
5107 program->num_views);
5108 pvr_write_draw_indirect_elements_base_instance3_idx_stride(
5109 buffer,
5110 program->index_stride);
5111 pvr_write_draw_indirect_elements_base_instance3_idx_base(
5112 buffer,
5113 program->index_buffer);
5114 pvr_write_draw_indirect_elements_base_instance3_idx_header(
5115 buffer,
5116 program->index_block_header);
5117 pvr_write_draw_indirect_elements_base_instance3_immediates(
5118 buffer);
5119 }
5120 } else {
5121 pvr_write_draw_indirect_elements3_di_data(buffer,
5122 program->arg_buffer &
5123 ~0xfull,
5124 dev_info);
5125 pvr_write_draw_indirect_elements3_write_vdm(
5126 buffer,
5127 program->index_list_addr_buffer);
5128 pvr_write_draw_indirect_elements3_flush_vdm(
5129 buffer,
5130 program->index_list_addr_buffer);
5131 pvr_write_draw_indirect_elements3_num_views(buffer,
5132 program->num_views);
5133 pvr_write_draw_indirect_elements3_idx_stride(buffer,
5134 program->index_stride);
5135 pvr_write_draw_indirect_elements3_idx_base(buffer,
5136 program->index_buffer);
5137 pvr_write_draw_indirect_elements3_idx_header(
5138 buffer,
5139 program->index_block_header);
5140 pvr_write_draw_indirect_elements3_immediates(buffer);
5141 }
5142 break;
5143 }
5144 }
5145 }
5146