• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef PVR_PDS_H
25 #define PVR_PDS_H
26 
27 #include <stdbool.h>
28 
29 #include "pvr_device_info.h"
30 #include "pvr_limits.h"
31 #include "pds/pvr_rogue_pds_defs.h"
32 #include "util/macros.h"
33 
34 #ifdef __cplusplus
35 #   define restrict __restrict__
36 #endif
37 
38 /*****************************************************************************
39  Macro definitions
40 *****************************************************************************/
41 
42 /* Based on Maximum number of passes that may emit DOUTW x Maximum number that
43  * might be emitted.
44  */
45 #define PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW 6
46 /* Based on Maximum number of passes that may emit DOUTW x Maximum number that
47  * might be emitted.
48  */
49 #define PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW 3
50 /* Based on max(max(UBOs,cbuffers), numTextures). */
51 #define PVR_PDS_MAX_NUM_DMA_KICKS 32
52 #define PVR_PDS_NUM_VERTEX_STREAMS 32
53 #define PVR_PDS_NUM_VERTEX_ELEMENTS 32
54 #define PVR_MAXIMUM_ITERATIONS 128
55 
56 #define PVR_PDS_NUM_COMPUTE_INPUT_REGS 3
57 
58 #define PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)       \
59    PVR_HAS_FEATURE(dev_info, compute_morton_capable) && \
60       !PVR_HAS_ERN(dev_info, 45493)
61 
62 /* FIXME: Change BIL to SPV. */
63 /* Any variable location can have at most 4 32-bit components. */
64 #define BIL_COMPONENTS_PER_LOCATION 4
65 
66 /* Maximum number of DDMAD's that may be performed (Num attribs * Num DMA's per
67  * attribute).
68  */
69 #define PVR_MAX_VERTEX_ATTRIB_DMAS \
70    (PVR_MAX_VERTEX_INPUT_BINDINGS * BIL_COMPONENTS_PER_LOCATION)
71 
72 /*****************************************************************************
73  Typedefs
74 *****************************************************************************/
75 
76 /* FIXME: We might need to change some bools to this. */
77 typedef uint32_t PVR_PDS_BOOL;
78 
79 /*****************************************************************************
80  Enums
81 *****************************************************************************/
82 
83 enum pvr_pds_generate_mode {
84    PDS_GENERATE_SIZES,
85    PDS_GENERATE_CODE_SEGMENT,
86    PDS_GENERATE_DATA_SEGMENT,
87    PDS_GENERATE_CODEDATA_SEGMENTS
88 };
89 
90 enum pvr_pds_store_type { PDS_COMMON_STORE, PDS_UNIFIED_STORE };
91 
92 enum pvr_pds_vertex_attrib_program_type {
93    PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC,
94    PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE,
95    PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT,
96    PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT
97 };
98 
99 /*****************************************************************************
100  Structure definitions
101 *****************************************************************************/
102 
103 struct pvr_psc_register {
104    uint32_t num;
105 
106    unsigned int size; /* size of each element. */
107    unsigned int dim : 4; /* max number of elements. */
108    unsigned int index; /* offset into array. */
109 
110    unsigned int cast;
111 
112    unsigned int type;
113    uint64_t name;
114    bool auto_assign;
115    unsigned int original_type;
116 };
117 
118 struct pvr_psc_program_output {
119    const uint32_t *code;
120 
121    struct pvr_psc_register *data;
122    unsigned int data_count;
123 
124    unsigned int data_size_aligned;
125    unsigned int code_size_aligned;
126    unsigned int temp_size_aligned;
127 
128    unsigned int data_size;
129    unsigned int code_size;
130    unsigned int temp_size;
131 
132    void (*write_data)(void *data, uint32_t *buffer);
133 };
134 
135 struct pvr_pds_usc_task_control {
136    uint64_t src0;
137 };
138 
139 /* Up to 4 64-bit state words currently supported. */
140 #define PVR_PDS_MAX_NUM_DOUTW_CONSTANTS 4
141 
142 /* Structure for DOUTW. */
143 struct pvr_pds_doutw_control {
144    enum pvr_pds_store_type dest_store;
145    uint32_t num_const64;
146    uint64_t doutw_data[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
147    bool last_instruction;
148 
149    uint32_t *data_segment;
150    uint32_t data_size;
151    uint32_t code_size;
152 };
153 
154 /* Structure representing the PDS pixel event program.
155  *
156  * data_segment - pointer to the data segment
157  * task_control - USC task control words
158  * emit_words - array of Emit words
159  * data_size - size of data segment
160  * code_size - size of code segment
161  */
162 struct pvr_pds_event_program {
163    uint32_t *data_segment;
164    struct pvr_pds_usc_task_control task_control;
165 
166    uint32_t num_emit_word_pairs;
167    uint32_t *emit_words;
168 
169    uint32_t data_size;
170    uint32_t code_size;
171 };
172 
173 /*
174  * Structure representing the PDS pixel shader secondary attribute program.
175  *
176  * data_segment - pointer to the data segment
177  *
178  * num_uniform_dma_kicks - number of Uniform DMA kicks
179  * uniform_dma_control - array of Uniform DMA control words
180  * uniform_dma_address - array of Uniform DMA address words
181  *
182  * num_texture_dma_kicks - number of Texture State DMA kicks
183  * texture_dma_control - array of Texture State DMA control words
184  * texture_dma_address - array of Texture State DMA address words
185  *
186  * data_size - size of data segment
187  * code_size - size of code segment
188  *
189  * temps_used - PDS Temps
190  */
191 struct pvr_pds_pixel_shader_sa_program {
192    uint32_t *data_segment;
193 
194    uint32_t num_dword_doutw;
195    uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
196    uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
197 
198    uint32_t num_q_word_doutw;
199    uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
200    uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
201 
202    uint32_t num_uniform_dma_kicks;
203    uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
204    uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
205 
206    uint32_t num_texture_dma_kicks;
207    uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
208    uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
209 
210    bool kick_usc;
211    bool write_tile_position;
212    uint32_t tile_position_attr_dest;
213    struct pvr_pds_usc_task_control usc_task_control;
214 
215    bool clear;
216    uint32_t *clear_color;
217    uint32_t clear_color_dest_reg;
218    bool packed_clear;
219 
220    uint32_t data_size;
221    uint32_t code_size;
222 
223    uint32_t temps_used;
224 };
225 
226 /* Structure representing the PDS pixel shader program.
227  *
228  * data_segment - pointer to the data segment
229  * usc_task_control - array of USC task control words
230  *
231  * data_size - size of data segment
232  * code_size - size of code segment
233  */
234 struct pvr_pds_kickusc_program {
235    uint32_t *data_segment;
236    struct pvr_pds_usc_task_control usc_task_control;
237 
238    uint32_t data_size;
239    uint32_t code_size;
240 };
241 
242 /* Structure representing the PDS fence/doutc program.
243  *
244  * data_segment - pointer to the data segment
245  * data_size - size of data segment
246  * code_size - size of code segment
247  */
248 struct pvr_pds_fence_program {
249    uint32_t *data_segment;
250    uint32_t fence_constant_word;
251    uint32_t data_size;
252    uint32_t code_size;
253 };
254 
255 /* Structure representing the PDS coefficient loading.
256  *
257  * data_segment - pointer to the data segment
258  * num_fpu_iterators - number of FPU iterators
259  * FPU_iterators - array of FPU iterator control words
260  * destination - array of Common Store destinations
261  *
262  * data_size - size of data segment
263  * code_size - size of code segment
264  */
265 struct pvr_pds_coeff_loading_program {
266    uint32_t *data_segment;
267    uint32_t num_fpu_iterators;
268    uint32_t FPU_iterators[PVR_MAXIMUM_ITERATIONS];
269    uint32_t destination[PVR_MAXIMUM_ITERATIONS];
270 
271    uint32_t data_size;
272    uint32_t code_size;
273 
274    uint32_t temps_used;
275 };
276 
277 /* Structure representing the PDS vertex shader secondary attribute program.
278  *
279  * data_segment - pointer to the data segment
280  * num_dma_kicks - number of DMA kicks
281  * dma_control - array of DMA control words
282  * dma_address - array of DMA address words
283  *
284  * data_size - size of data segment
285  * code_size - size of code segment
286  */
287 struct pvr_pds_vertex_shader_sa_program {
288    uint32_t *data_segment;
289 
290    /* num_uniform_dma_kicks, uniform_dma_address, uniform_dma_control, are not
291     * used for generating PDS data section and code section, they are currently
292     * only used to simpler the driver implementation. The driver should correct
293     * these information into num_dma_kicks, dma_address and dma_control to get
294     * the PDS properly generated.
295     */
296 
297    uint32_t num_dword_doutw;
298    uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
299    uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
300 
301    uint32_t num_q_word_doutw;
302    uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
303    uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
304 
305    uint32_t num_uniform_dma_kicks;
306    uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
307    uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
308 
309    uint32_t num_texture_dma_kicks;
310    uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
311    uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
312 
313    uint32_t num_dma_kicks;
314    uint64_t dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
315    uint32_t dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
316 
317    bool kick_usc;
318    struct pvr_pds_usc_task_control usc_task_control;
319 
320    /* Shared register buffer base address (VDM/CDM context load case only). */
321    bool clear_pds_barrier;
322 
323    uint32_t data_size;
324    uint32_t code_size;
325 };
326 
327 /* Structure representing a PDS vertex stream element.
328  *
329  * There are two types of element, repeat DMA and non-repeat DMA.
330  *
331  * Non repeat DMA are the classic DMA of some number of bytes from an offset
332  * into contiguous registers. It is assumed the address and size are dword
333  * aligned. To use this, specify 0 for the component size. Each four bytes read
334  * will go to the next HW register.
335  *
336  * Repeat DMA enables copying of sub dword amounts at non dword aligned
337  * addresses. To use this, specify the component size as either 1,2,3 or 4
338  * bytes. Size specifies the number of components, and each component read
339  * will go to the next HW register.
340  *
341  * In both cases, HW registers are written contiguously.
342  *
343  * offset - offset of the vertex stream element
344  * size - size of the vertex stream element in bytes for non repeat DMA, or
345  *        number of components for repeat DMA.
346  * reg - first vertex stream element register to DMA to.
347  * component_size - Size of component for repeat DMA, or 0 for non repeat dma.
348  */
349 struct pvr_pds_vertex_element {
350    uint32_t offset;
351    uint32_t size;
352    uint16_t reg;
353    uint16_t component_size;
354 };
355 
356 /* Structure representing a PDS vertex stream.
357  *
358  * instance_data - flag whether the vertex stream is indexed or instance data
359  * read_back - If True, vertex is reading back data output by GPU earlier in
360  *             same kick. This will enable MCU coherency if relevant.
361  * multiplier - vertex stream frequency multiplier
362  * shift - vertex stream frequency shift
363  * address - vertex stream address in bytes
364  * buffer_size_in_bytes - buffer size in bytes if vertex attribute is sourced
365  *                        from buffer object
366  * stride - vertex stream stride in bytes
367  * num_vertices - number of vertices in buffer. Used for OOB checking.
368                 - 0 = disable oob checking.
369  * num_elements - number of vertex stream elements
370  * elements - array of vertex stream elements
371  * use_ddmadt - When the has_pds_ddmadt feature is enabled. Boolean allowing
372  *              DDMADT to be use per stream element.
373  */
374 struct pvr_pds_vertex_stream {
375    bool current_state;
376    bool instance_data;
377    bool read_back;
378    uint32_t multiplier;
379    uint32_t shift;
380    uint64_t address;
381    uint32_t buffer_size_in_bytes;
382    uint32_t stride;
383    uint32_t num_vertices;
384    uint32_t num_elements;
385    struct pvr_pds_vertex_element elements[PVR_PDS_NUM_VERTEX_ELEMENTS];
386 
387    bool use_ddmadt;
388 };
389 
390 /* Structure representing the PDS vertex shader program.
391  *
392  * This structure describes the USC code and vertex buffers required
393  * by the PDS vertex loading program.
394  *
395  * data_segment - Pointer to the data segment.
396  * usc_task_control - Description of USC task for vertex shader program.
397  * num_streams - Number of vertex streams.
398  * iterate_vtx_id - If set, the vertex id should be iterated.
399  * vtx_id_register - The register to iterate the VertexID into (if applicable)
400  * vtx_id_modifier - Value to pvr_add/SUB from index value received by PDS.
401  *                   This is used because the index value received by PDS has
402  *                   INDEX_OFFSET added, and generally VertexID wouldn't.
403  * vtx_id_sub_modifier - If true, vtx_id_modifier is subtracted, else added.
404  * iterate_instance_id - If set, the instance id should be iterated.
405  * instance_id_register - The register to iterate the InstanceID into (if
406  *                        applicable). The vertex and instance id will both be
407  *                        iterated as unsigned ints
408  *
409  * iterate_remap_id - Should be set to true if vertex shader needs
410  *                    VS_REMAPPED_INDEX_ID (e.g. Another TA shader runs after
411  *                    it).
412  * null_idx - Indicates no index buffer is bound, so every index should be
413  *            null_idx_value.
414  * null_idx_value - The value to use as index if null_idx set.
415  * data_size - Size of data segment, in dwords. Output by call to
416  *             pvr_pds_vertex_shader, and used as input when generating data.
417  * code_size - Size of code segment. Output by call to pvr_pds_vertex_shader.
418  *             This is the number of dword instructions that are/were generated.
419  * temps_used - Number of temporaries used. Output by call to
420  *              pvr_pds_vertex_shader.
421  */
422 struct pvr_pds_vertex_shader_program {
423    uint32_t *data_segment;
424    struct pvr_pds_usc_task_control usc_task_control;
425    uint32_t num_streams;
426 
427    bool iterate_vtx_id;
428    uint32_t vtx_id_register;
429    uint32_t vtx_id_modifier;
430    bool vtx_id_sub_modifier;
431 
432    bool iterate_instance_id;
433    uint32_t instance_id_register;
434    uint32_t instance_ID_modifier;
435    uint32_t base_instance;
436 
437    bool iterate_remap_id;
438 
439    bool null_idx;
440    uint32_t null_idx_value;
441 
442    uint32_t *stream_patch_offsets;
443    uint32_t num_stream_patches;
444 
445    uint32_t data_size;
446    uint32_t code_size;
447    uint32_t temps_used;
448    uint32_t ddmadt_enables;
449    uint32_t skip_stream_flag;
450 
451    bool draw_indirect;
452    bool indexed;
453 
454    struct pvr_pds_vertex_stream streams[PVR_PDS_NUM_VERTEX_STREAMS];
455 };
456 
457 /* Structure representing PDS shared reg storing program. */
458 struct pvr_pds_shared_storing_program {
459    struct pvr_pds_doutw_control doutw_control; /*!< DOUTW state */
460    struct pvr_pds_kickusc_program usc_task; /*!< DOUTU state */
461    bool cc_enable; /*!< cc bit is set on the doutu instruction. */
462    uint32_t data_size; /*!< total data size, non-aligned. */
463    uint32_t code_size; /*!< total code size, non-aligned. */
464 };
465 
466 #define PVR_MAX_STREAMOUT_BUFFERS 4
467 
468 /* Structure representing stream out init PDS programs. */
469 struct pvr_pds_stream_out_init_program {
470    /* --- Input to PDS_STREAM_OUT_INT_PROGRAM --- */
471 
472    /* Number of buffers to load/store.
473     * This indicates the number of entries in the next two arrays.
474     * Data is loaded/stored contiguously to persistent temps.
475     */
476    uint32_t num_buffers;
477 
478    /* Number of persistent temps in dword to load/store for each buffer. */
479    uint32_t pds_buffer_data_size[PVR_MAX_STREAMOUT_BUFFERS];
480    /* The device address for loading/storing persistent temps for each buffer.
481     * If address is zero, then no data is loaded/stored
482     * into pt registers for the buffer.
483     */
484    uint64_t dev_address_for_buffer_data[PVR_MAX_STREAMOUT_BUFFERS];
485 
486    /* PDS state update Stream Out Init Programs. */
487    uint32_t stream_out_init_pds_data_size;
488    uint32_t stream_out_init_pds_code_size;
489 };
490 
491 /* Structure representing stream out terminate PDS program. */
492 struct pvr_pds_stream_out_terminate_program {
493    /* Input to PDS_STREAM_OUT_TERMINATE_PROGRAM.
494     *
495     * Number of persistent temps in dword used in stream out PDS programs needs
496     * to be stored.
497     * The terminate program writes pds_persistent_temp_size_to_store number
498     * persistent temps to dev_address_for_storing_persistent_temp.
499     */
500    uint32_t pds_persistent_temp_size_to_store;
501 
502    /* The device address for storing persistent temps. */
503    uint64_t dev_address_for_storing_persistent_temp;
504 
505    /* PPP state update Stream Out Program for stream out terminate. */
506    uint32_t stream_out_terminate_pds_data_size;
507    uint32_t stream_out_terminate_pds_code_size;
508 };
509 
510 /*  Structure representing the PDS compute shader program.
511  *	This structure describes the USC code and compute buffers required
512  *	by the PDS compute task loading program
513  *
514  *	data_segment
515  *		pointer to the data segment
516  *	usc_task_control
517  *		Description of USC task for compute shader program.
518  *	data_size
519  *		Size of data segment, in dwords.
520  *		Output by call to pvr_pds_compute_shader, and used as input when
521  *   generating data. code_size Size of code segment. Output by call to
522  *   pvr_pds_compute_shader. This is the number of dword instructions that
523  *   are/were generated. temps_used Number of temporaries used. Output by call
524  *to pvr_pds_compute_shader. highest_temp The highest temp number used. Output
525  *by call to pvr_pds_compute_shader coeff_update_task_branch_size The number of
526  *   instructions we need to branch over to skip the coefficient update task.
527  */
528 
529 struct pvr_pds_compute_shader_program {
530    uint32_t *data_segment;
531    struct pvr_pds_usc_task_control usc_task_control;
532    struct pvr_pds_usc_task_control usc_task_control_coeff_update;
533 
534    uint32_t data_size;
535    uint32_t code_size;
536 
537    uint32_t temps_used;
538    uint32_t highest_temp;
539 
540    uint32_t local_input_regs[3];
541    uint32_t work_group_input_regs[3];
542    uint32_t global_input_regs[3];
543 
544    uint32_t barrier_coefficient;
545 
546    bool fence;
547 
548    bool flattened_work_groups;
549 
550    bool clear_pds_barrier;
551 
552    bool has_coefficient_update_task;
553 
554    uint32_t coeff_update_task_branch_size;
555 
556    bool add_base_workgroup;
557    uint32_t base_workgroup_constant_offset_in_dwords[3];
558 
559    bool kick_usc;
560 
561    bool conditional_render;
562    uint32_t cond_render_const_offset_in_dwords;
563    uint32_t cond_render_pred_temp;
564 };
565 struct pvr_pds_ldst_control {
566    uint64_t cache_control_const;
567 };
568 
569 /* Define a value we can use as a register number in the driver to denote that
570  * the value is unused.
571  */
572 #define PVR_PDS_COMPUTE_INPUT_REG_UNUSED 0xFFFFFFFFU
573 
574 /*****************************************************************************
575  function declarations
576 *****************************************************************************/
577 
578 /*****************************************************************************
579  Constructors
580 *****************************************************************************/
581 
582 void pvr_pds_pixel_shader_sa_initialize(
583    struct pvr_pds_pixel_shader_sa_program *program);
584 void pvr_pds_compute_shader_initialize(
585    struct pvr_pds_compute_shader_program *program);
586 
587 /* Utility */
588 
589 uint32_t pvr_pds_append_constant64(uint32_t *constants,
590                                    uint64_t constant_value,
591                                    uint32_t *data_size);
592 
593 uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
594                                   uint64_t *dma_address,
595                                   uint32_t dest_offset,
596                                   uint32_t dma_size,
597                                   uint64_t src_address,
598                                   const struct pvr_device_info *dev_info);
599 
600 void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
601                          uint64_t execution_address,
602                          uint32_t usc_temps,
603                          uint32_t sample_rate,
604                          bool phase_rate_change);
605 
606 /* Pixel */
607 #define pvr_pds_set_sizes_pixel_shader(X) \
608    pvr_pds_kick_usc(X, NULL, 0, false, PDS_GENERATE_SIZES)
609 #define pvr_pds_generate_pixel_shader_program(X, Y) \
610    pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS)
611 
612 #define pvr_pds_generate_VDM_sync_program(X, Y) \
613    pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS)
614 
615 uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
616                                  uint32_t *restrict buffer,
617                                  enum pvr_pds_generate_mode gen_mode);
618 
619 uint32_t *
620 pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict psControl,
621                        uint32_t *restrict buffer,
622                        enum pvr_pds_generate_mode gen_mode,
623                        const struct pvr_device_info *dev_info);
624 
625 uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
626                            uint32_t *restrict buffer,
627                            uint32_t start_next_constant,
628                            bool cc_enabled,
629                            enum pvr_pds_generate_mode gen_mode);
630 
631 /* Pixel Secondary */
632 #define pvr_pds_set_sizes_pixel_shader_sa_uniform_data(X, Y)     \
633    pvr_pds_pixel_shader_uniform_texture_data(X,                  \
634                                              NULL,               \
635                                              PDS_GENERATE_SIZES, \
636                                              true,               \
637                                              Y)
638 #define pvr_pds_set_sizes_pixel_shader_sa_texture_data(X, Y)     \
639    pvr_pds_pixel_shader_uniform_texture_data(X,                  \
640                                              NULL,               \
641                                              PDS_GENERATE_SIZES, \
642                                              false,              \
643                                              Y)
644 #define pvr_pds_set_sizes_pixel_shader_uniform_texture_code(X) \
645    pvr_pds_pixel_shader_uniform_texture_code(X, NULL, PDS_GENERATE_SIZES)
646 
647 #define pvr_pds_generate_pixel_shader_sa_texture_state_data(X, Y, Z)    \
648    pvr_pds_pixel_shader_uniform_texture_data(X,                         \
649                                              Y,                         \
650                                              PDS_GENERATE_DATA_SEGMENT, \
651                                              false,                     \
652                                              Z)
653 
654 #define pvr_pds_generate_pixel_shader_sa_code_segment(X, Y) \
655    pvr_pds_pixel_shader_uniform_texture_code(X, Y, PDS_GENERATE_CODE_SEGMENT)
656 
657 uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
658    struct pvr_pds_pixel_shader_sa_program *restrict program,
659    uint32_t *restrict buffer,
660    enum pvr_pds_generate_mode gen_mode,
661    bool uniform,
662    const struct pvr_device_info *dev_info);
663 
664 uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
665    struct pvr_pds_pixel_shader_sa_program *restrict program,
666    uint32_t *restrict buffer,
667    enum pvr_pds_generate_mode gen_mode);
668 
669 /* Vertex */
670 #define pvr_pds_set_sizes_vertex_shader(X, Y) \
671    pvr_pds_vertex_shader(X, NULL, PDS_GENERATE_SIZES, Y)
672 
673 #define pvr_pds_generate_vertex_shader_data_segment(X, Y, Z) \
674    pvr_pds_vertex_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
675 
676 #define pvr_pds_generate_vertex_shader_code_segment(X, Y, Z) \
677    pvr_pds_vertex_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
678 
679 uint32_t *
680 pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
681                       uint32_t *restrict buffer,
682                       enum pvr_pds_generate_mode gen_mode,
683                       const struct pvr_device_info *dev_info);
684 
685 /* Compute */
686 uint32_t *
687 pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
688                        uint32_t *restrict buffer,
689                        enum pvr_pds_generate_mode gen_mode,
690                        const struct pvr_device_info *dev_info);
691 
692 #define pvr_pds_set_sizes_compute_shader(X, Y) \
693    pvr_pds_compute_shader(X, NULL, PDS_GENERATE_SIZES, Y)
694 
695 #define pvr_pds_generate_compute_shader_data_segment(X, Y, Z) \
696    pvr_pds_compute_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
697 
698 #define pvr_pds_generate_compute_shader_code_segment(X, Y, Z) \
699    pvr_pds_compute_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
700 
701 /* Vertex Secondary */
702 #define pvr_pds_set_sizes_vertex_shader_sa(X, Y) \
703    pvr_pds_vertex_shader_sa(X, NULL, PDS_GENERATE_SIZES, Y)
704 
705 #define pvr_pds_generate_vertex_shader_sa_data_segment(X, Y, Z) \
706    pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
707 
708 #define pvr_pds_generate_vertex_shader_sa_code_segment(X, Y, Z) \
709    pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
710 
711 uint32_t *pvr_pds_vertex_shader_sa(
712    struct pvr_pds_vertex_shader_sa_program *restrict program,
713    uint32_t *restrict buffer,
714    enum pvr_pds_generate_mode gen_mode,
715    const struct pvr_device_info *dev_info);
716 
717 /* Pixel Event */
718 #define pvr_pds_set_sizes_pixel_event(X, Y) \
719    pvr_pds_generate_pixel_event(X, NULL, PDS_GENERATE_SIZES, Y)
720 
721 #define pvr_pds_generate_pixel_event_data_segment(X, Y, Z) \
722    pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
723 
724 #define pvr_pds_generate_pixel_event_code_segment(X, Y, Z) \
725    pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
726 
727 uint32_t *
728 pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
729                              uint32_t *restrict buffer,
730                              enum pvr_pds_generate_mode gen_mode,
731                              const struct pvr_device_info *dev_info);
732 
733 /* Coefficient Loading */
734 #define pvr_pds_set_sizes_coeff_loading(X) \
735    pvr_pds_coefficient_loading(X, NULL, PDS_GENERATE_SIZES)
736 
737 #define pvr_pds_generate_coeff_loading_program(X, Y) \
738    pvr_pds_coefficient_loading(X, Y, PDS_GENERATE_CODE_SEGMENT)
739 
740 uint32_t *pvr_pds_coefficient_loading(
741    struct pvr_pds_coeff_loading_program *restrict program,
742    uint32_t *restrict buffer,
743    enum pvr_pds_generate_mode gen_mode);
744 
745 /* Compute DM barrier-specific conditional code */
746 uint32_t *pvr_pds_generate_compute_barrier_conditional(
747    uint32_t *buffer,
748    enum pvr_pds_generate_mode gen_mode);
749 
750 /* Shared register storing */
751 uint32_t *pvr_pds_generate_shared_storing_program(
752    struct pvr_pds_shared_storing_program *restrict program,
753    uint32_t *restrict buffer,
754    enum pvr_pds_generate_mode gen_mode,
755    const struct pvr_device_info *dev_info);
756 
757 /*Shared register loading */
758 uint32_t *pvr_pds_generate_fence_terminate_program(
759    struct pvr_pds_fence_program *restrict program,
760    uint32_t *restrict buffer,
761    enum pvr_pds_generate_mode gen_mode,
762    const struct pvr_device_info *dev_info);
763 
764 /* CDM Shared register loading */
765 uint32_t *pvr_pds_generate_compute_shared_loading_program(
766    struct pvr_pds_shared_storing_program *restrict program,
767    uint32_t *restrict buffer,
768    enum pvr_pds_generate_mode gen_mode,
769    const struct pvr_device_info *dev_info);
770 
771 /* Stream out */
772 uint32_t *pvr_pds_generate_stream_out_init_program(
773    struct pvr_pds_stream_out_init_program *restrict program,
774    uint32_t *restrict buffer,
775    bool store_mode,
776    enum pvr_pds_generate_mode gen_mode,
777    const struct pvr_device_info *dev_info);
778 
779 uint32_t *pvr_pds_generate_stream_out_terminate_program(
780    struct pvr_pds_stream_out_terminate_program *restrict program,
781    uint32_t *restrict buffer,
782    enum pvr_pds_generate_mode gen_mode,
783    const struct pvr_device_info *dev_info);
784 
785 /* Structure representing DrawIndirect PDS programs. */
786 struct pvr_pds_drawindirect_program {
787    /* --- Input to pvr_pds_drawindirect_program --- */
788 
789    /* Address of the index list block in the VDM control stream.
790     * This must point to a 128-bit aligned index list header.
791     */
792    uint64_t index_list_addr_buffer;
793    /* Address of arguments for Draw call. Layout is defined by eArgFormat. */
794    uint64_t arg_buffer;
795 
796    /* Address of index buffer. */
797    uint64_t index_buffer;
798 
799    /* The raw (without addr msb in [7:0]) index block header. */
800    uint32_t index_block_header;
801 
802    /* Number of bytes per index. */
803    uint32_t index_stride;
804 
805    /* Used during/after compilation to fill in constant buffer. */
806    struct pvr_psc_register data[32];
807 
808    /* Results of compilation. */
809    struct pvr_psc_program_output program;
810 
811    /* This is used for ARB_multi_draw_indirect. */
812    unsigned int count;
813    unsigned int stride;
814 
815    /* Internal stuff. */
816    unsigned int num_views;
817 
818    bool support_base_instance;
819    bool increment_draw_id;
820 };
821 
822 void pvr_pds_generate_draw_arrays_indirect(
823    struct pvr_pds_drawindirect_program *restrict program,
824    uint32_t *restrict buffer,
825    enum pvr_pds_generate_mode gen_mode,
826    const struct pvr_device_info *dev_info);
827 void pvr_pds_generate_draw_elements_indirect(
828    struct pvr_pds_drawindirect_program *restrict program,
829    uint32_t *restrict buffer,
830    enum pvr_pds_generate_mode gen_mode,
831    const struct pvr_device_info *dev_info);
832 
833 uint64_t pvr_pds_encode_st_src0(uint64_t src,
834                                 uint64_t count4,
835                                 uint64_t dst_add,
836                                 bool write_through,
837                                 const struct pvr_device_info *dev_info);
838 
839 uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
840                                 uint64_t count8,
841                                 uint64_t src_add,
842                                 bool cached,
843                                 const struct pvr_device_info *dev_info);
844 
845 uint32_t *pvr_pds_generate_single_ldst_instruction(
846    bool ld,
847    const struct pvr_pds_ldst_control *control,
848    uint32_t temp_index,
849    uint64_t address,
850    uint32_t count,
851    uint32_t *next_constant,
852    uint32_t *total_data_size,
853    uint32_t *total_code_size,
854    uint32_t *buffer,
855    bool data_fence,
856    enum pvr_pds_generate_mode gen_mode,
857    const struct pvr_device_info *dev_info);
858 struct pvr_pds_descriptor_set {
859    unsigned int descriptor_set; /* id of the descriptor set. */
860    unsigned int size_in_dwords; /* Number of dwords to transfer. */
861    unsigned int destination; /* Destination shared register to which
862                               * descriptor entries should be loaded.
863                               */
864    bool primary; /* Primary or secondary? */
865    unsigned int offset_in_dwords; /* Offset from the start of the descriptor
866                                    * set to start DMA'ing from.
867                                    */
868 };
869 
870 #define PVR_BUFFER_TYPE_UBO (0)
871 #define PVR_BUFFER_TYPES_COMPILE_TIME (1)
872 #define PVR_BUFFER_TYPE_BLEND_CONSTS (2)
873 #define PVR_BUFFER_TYPE_PUSH_CONSTS (3)
874 #define PVR_BUFFER_TYPES_BUFFER_LENGTHS (4)
875 #define PVR_BUFFER_TYPE_DYNAMIC (5)
876 #define PVR_BUFFER_TYPES_UBO_ZEROING (6)
877 #define PVR_BUFFER_TYPE_INVALID (~0)
878 
879 struct pvr_pds_buffer {
880    uint16_t type;
881 
882    uint16_t size_in_dwords;
883    uint32_t destination;
884 
885    union {
886       uint32_t *data;
887       struct {
888          uint32_t buffer_id;
889          uint16_t desc_set;
890          uint16_t binding;
891          uint32_t source_offset;
892       };
893    };
894 };
895 
896 #define PVR_PDS_MAX_BUFFERS (24)
897 
898 struct pvr_descriptor_program_input {
899    /* User-specified descriptor sets. */
900    unsigned int descriptor_set_count;
901    struct pvr_pds_descriptor_set descriptor_sets[8];
902 
903    /* "State" buffers, including:
904     * compile-time constants
905     * blend constants
906     * push constants
907     * UBOs that have been hoisted.
908     */
909    uint32_t buffer_count;
910    struct pvr_pds_buffer buffers[PVR_PDS_MAX_BUFFERS];
911 
912    uint32_t blend_constants_used_mask;
913 
914    bool secondary_program_present;
915    struct pvr_pds_usc_task_control secondary_task_control;
916 
917    bool must_not_be_empty;
918 };
919 
920 #define PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED BITFIELD_BIT(0U)
921 #define PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED BITFIELD_BIT(1U)
922 #define PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT BITFIELD_BIT(2U)
923 #define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT BITFIELD_BIT(3U)
924 #define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED BITFIELD_BIT(4U)
925 
926 /* BaseVertex is used in shader. */
927 #define PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED BITFIELD_BIT(5U)
928 
929 #define PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED BITFIELD_BIT(6U)
930 
931 #define PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE BITFIELD_BIT(0U)
932 
933 struct pvr_pds_vertex_dma {
934    /* Try and keep this structure packing as small as possible. */
935    uint16_t offset;
936    uint16_t stride;
937 
938    uint8_t flags;
939    uint8_t size_in_dwords;
940    uint8_t component_size_in_bytes;
941    uint8_t destination;
942    uint8_t binding_index;
943    uint32_t divisor;
944 
945    uint16_t robustness_buffer_offset;
946 };
947 
948 struct pvr_pds_vertex_primary_program_input {
949    /* Control for the DOUTU that kicks the vertex USC shader. */
950    struct pvr_pds_usc_task_control usc_task_control;
951    /* List of DMAs (of size dma_count). */
952    struct pvr_pds_vertex_dma *dma_list;
953    uint32_t dma_count;
954 
955    /* ORd bitfield of PVR_PDS_VERTEX_FLAGS_* */
956    uint32_t flags;
957 
958    uint16_t vertex_id_register;
959    uint16_t instance_id_register;
960 
961    /* API provided baseInstance (i.e. not from drawIndirect). */
962    uint32_t base_instance;
963 
964    uint16_t base_instance_register;
965    uint16_t base_vertex_register;
966    uint16_t draw_index_register;
967 };
968 
969 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_NULL (0)
970 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64 (1)
971 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32 (2)
972 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET (3)
973 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER (4)
974 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER (5)
975 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS (6)
976 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS (7)
977 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS (8)
978 
979 /* Use if pds_ddmadt is enabled. */
980 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE (9)
981 
982 /* Use if pds_ddmadt is not enabled. */
983 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX (9)
984 
985 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE (10)
986 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING (11)
987 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX (12)
988 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_WORKGROUP (13)
989 #define PVR_PDS_CONST_MAP_ENTRY_TYPE_COND_RENDER (14)
990 
991 /* We pack all the following structs tightly into a buffer using += sizeof(x)
992  * offsets, this can lead to data that is not native aligned. Supplying the
993  * packed attribute indicates that unaligned accesses may be required, and the
994  * aligned attribute causes the size of the structure to be aligned to a
995  * specific boundary.
996  */
997 #define PVR_ALIGNED __attribute__((packed, aligned(1)))
998 
999 struct pvr_const_map_entry {
1000    uint8_t type;
1001    uint8_t const_offset;
1002 } PVR_ALIGNED;
1003 
1004 struct pvr_const_map_entry_literal32 {
1005    uint8_t type;
1006    uint8_t const_offset;
1007 
1008    uint32_t literal_value;
1009 } PVR_ALIGNED;
1010 
1011 struct pvr_const_map_entry_literal64 {
1012    uint8_t type;
1013    uint8_t const_offset;
1014 
1015    uint64_t literal_value;
1016 } PVR_ALIGNED;
1017 
1018 struct pvr_const_map_entry_descriptor_set {
1019    uint8_t type;
1020    uint8_t const_offset;
1021 
1022    uint32_t descriptor_set;
1023    PVR_PDS_BOOL primary;
1024    uint32_t offset_in_dwords;
1025 } PVR_ALIGNED;
1026 
1027 struct pvr_const_map_entry_constant_buffer {
1028    uint8_t type;
1029    uint8_t const_offset;
1030 
1031    uint16_t buffer_id;
1032    uint16_t desc_set;
1033    uint16_t binding;
1034    uint32_t offset;
1035    uint32_t size_in_dwords;
1036 } PVR_ALIGNED;
1037 
1038 struct pvr_const_map_entry_constant_buffer_zeroing {
1039    uint8_t type;
1040    uint8_t const_offset;
1041 
1042    uint16_t buffer_id;
1043    uint32_t offset;
1044    uint32_t size_in_dwords;
1045 } PVR_ALIGNED;
1046 
1047 struct pvr_const_map_entry_special_buffer {
1048    uint8_t type;
1049    uint8_t const_offset;
1050 
1051    uint8_t buffer_type;
1052    uint32_t buffer_index;
1053 } PVR_ALIGNED;
1054 
1055 struct pvr_const_map_entry_doutu_address {
1056    uint8_t type;
1057    uint8_t const_offset;
1058 
1059    uint64_t doutu_control;
1060 } PVR_ALIGNED;
1061 
1062 struct pvr_const_map_entry_vertex_attribute_address {
1063    uint8_t type;
1064    uint8_t const_offset;
1065 
1066    uint16_t offset;
1067    uint16_t stride;
1068    uint8_t binding_index;
1069    uint8_t size_in_dwords;
1070 } PVR_ALIGNED;
1071 
1072 struct pvr_const_map_entry_robust_vertex_attribute_address {
1073    uint8_t type;
1074    uint8_t const_offset;
1075 
1076    uint16_t offset;
1077    uint16_t stride;
1078    uint8_t binding_index;
1079    uint8_t size_in_dwords;
1080    uint16_t robustness_buffer_offset;
1081    uint8_t component_size_in_bytes;
1082 } PVR_ALIGNED;
1083 
1084 struct pvr_const_map_entry_vertex_attribute_max_index {
1085    uint8_t type;
1086    uint8_t const_offset;
1087 
1088    uint8_t binding_index;
1089    uint8_t size_in_dwords;
1090    uint16_t offset;
1091    uint16_t stride;
1092    uint8_t component_size_in_bytes;
1093 } PVR_ALIGNED;
1094 
1095 struct pvr_const_map_entry_base_instance {
1096    uint8_t type;
1097    uint8_t const_offset;
1098 } PVR_ALIGNED;
1099 
1100 struct pvr_const_map_entry_base_vertex {
1101    uint8_t type;
1102    uint8_t const_offset;
1103 };
1104 
1105 struct pvr_pds_const_map_entry_base_workgroup {
1106    uint8_t type;
1107    uint8_t const_offset;
1108    uint8_t workgroup_component;
1109 } PVR_ALIGNED;
1110 
1111 struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size {
1112    uint8_t type;
1113    uint8_t const_offset;
1114    uint8_t binding_index;
1115 } PVR_ALIGNED;
1116 
1117 struct pvr_pds_const_map_entry_cond_render {
1118    uint8_t type;
1119    uint8_t const_offset;
1120 
1121    uint32_t cond_render_pred_temp;
1122 } PVR_ALIGNED;
1123 
1124 struct pvr_pds_info {
1125    uint32_t temps_required;
1126    uint32_t code_size_in_dwords;
1127    uint32_t data_size_in_dwords;
1128 
1129    uint32_t entry_count;
1130    size_t entries_size_in_bytes;
1131    size_t entries_written_size_in_bytes;
1132    struct pvr_const_map_entry *entries;
1133 };
1134 
1135 void pvr_pds_generate_descriptor_upload_program(
1136    struct pvr_descriptor_program_input *input_program,
1137    uint32_t *code_section,
1138    struct pvr_pds_info *info);
1139 void pvr_pds_generate_vertex_primary_program(
1140    struct pvr_pds_vertex_primary_program_input *input_program,
1141    uint32_t *code,
1142    struct pvr_pds_info *info,
1143    bool use_robust_vertex_fetch,
1144    const struct pvr_device_info *dev_info);
1145 
1146 /**
1147  * Generate USC address.
1148  *
1149  * \param doutu Location to write the generated address.
1150  * \param execution_address Address to generate from.
1151  */
1152 static ALWAYS_INLINE void
pvr_set_usc_execution_address64(uint64_t * doutu,uint64_t execution_address)1153 pvr_set_usc_execution_address64(uint64_t *doutu, uint64_t execution_address)
1154 {
1155    doutu[0] |= (((execution_address >>
1156                   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSHIFT)
1157                  << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_SHIFT) &
1158                 ~PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_CLRMSK);
1159 }
1160 
1161 #endif /* PVR_PDS_H */
1162