• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 #ifndef ACO_SHADER_INFO_H
11 #define ACO_SHADER_INFO_H
12 
13 #include "ac_hw_stage.h"
14 #include "ac_shader_args.h"
15 #include "amd_family.h"
16 #include "shader_enums.h"
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 #define ACO_MAX_SO_OUTPUTS     128
23 #define ACO_MAX_SO_BUFFERS     4
24 #define ACO_MAX_VERTEX_ATTRIBS 32
25 #define ACO_MAX_VBS            32
26 
27 struct aco_vs_prolog_info {
28    struct ac_arg inputs;
29 
30    uint32_t instance_rate_inputs;
31    uint32_t nontrivial_divisors;
32    uint32_t zero_divisors;
33    uint32_t post_shuffle;
34    /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes
35     * using bitwise arithmetic.
36     */
37    uint32_t alpha_adjust_lo;
38    uint32_t alpha_adjust_hi;
39 
40    uint8_t formats[ACO_MAX_VERTEX_ATTRIBS];
41 
42    unsigned num_attributes;
43    uint32_t misaligned_mask;
44    uint32_t unaligned_mask;
45    bool is_ngg;
46    gl_shader_stage next_stage;
47 };
48 
49 struct aco_ps_epilog_info {
50    struct ac_arg colors[MAX_DRAW_BUFFERS];
51    uint8_t color_map[MAX_DRAW_BUFFERS];
52 
53    uint32_t spi_shader_col_format;
54 
55    /* Bitmasks, each bit represents one of the 8 MRTs. */
56    uint8_t color_is_int8;
57    uint8_t color_is_int10;
58 
59    bool mrt0_is_dual_src;
60 
61    bool alpha_to_coverage_via_mrtz;
62    bool alpha_to_one;
63 
64    /* OpenGL only */
65    uint16_t color_types;
66    bool clamp_color;
67    bool skip_null_export;
68    unsigned broadcast_last_cbuf;
69    enum compare_func alpha_func;
70    /* Depth/stencil/samplemask are always passed via VGPRs, and the epilog key can choose
71     * not to export them using these flags, which can be dynamic states.
72     */
73    bool kill_depth;
74    bool kill_stencil;
75    bool kill_samplemask;
76 
77    struct ac_arg alpha_reference;
78    struct ac_arg depth;
79    struct ac_arg stencil;
80    struct ac_arg samplemask;
81 };
82 
83 struct aco_ps_prolog_info {
84    bool poly_stipple;
85    unsigned poly_stipple_buf_offset;
86 
87    bool bc_optimize_for_persp;
88    bool bc_optimize_for_linear;
89    bool force_persp_sample_interp;
90    bool force_linear_sample_interp;
91    bool force_persp_center_interp;
92    bool force_linear_center_interp;
93 
94    unsigned samplemask_log_ps_iter;
95    unsigned num_interp_inputs;
96    unsigned colors_read;
97    int color_interp_vgpr_index[2];
98    int color_attr_index[2];
99    bool color_two_side;
100    bool needs_wqm;
101 
102    struct ac_arg internal_bindings;
103 };
104 
105 struct aco_shader_info {
106    enum ac_hw_stage hw_stage;
107    uint8_t wave_size;
108    bool has_ngg_culling;
109    bool has_ngg_early_prim_export;
110    bool image_2d_view_of_3d;
111    unsigned workgroup_size;
112    bool merged_shader_compiled_separately; /* GFX9+ */
113    struct ac_arg next_stage_pc;
114    struct ac_arg epilog_pc; /* Vulkan only */
115    struct {
116       bool tcs_in_out_eq;
117       bool any_tcs_inputs_via_lds;
118       bool has_prolog;
119    } vs;
120    struct {
121       struct ac_arg tcs_offchip_layout;
122 
123       /* Vulkan only */
124       uint32_t num_lds_blocks;
125    } tcs;
126    struct {
127       uint32_t num_inputs;
128       unsigned spi_ps_input_ena;
129       unsigned spi_ps_input_addr;
130       bool has_prolog;
131       bool has_epilog;
132 
133       /* OpenGL only */
134       struct ac_arg alpha_reference;
135    } ps;
136    struct {
137       bool uses_full_subgroups;
138    } cs;
139 
140    uint32_t gfx9_gs_ring_lds_size;
141 };
142 
143 enum aco_compiler_debug_level {
144    ACO_COMPILER_DEBUG_LEVEL_ERROR,
145 };
146 
147 struct aco_compiler_options {
148    bool dump_ir;
149    bool dump_preoptir;
150    bool record_asm;
151    bool record_ir;
152    bool record_stats;
153    bool has_ls_vgpr_init_bug;
154    bool load_grid_size_from_user_sgpr;
155    bool optimisations_disabled;
156    uint8_t enable_mrt_output_nan_fixup;
157    bool wgp_mode;
158    bool is_opengl;
159    enum radeon_family family;
160    enum amd_gfx_level gfx_level;
161    uint32_t address32_hi;
162    struct {
163       void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
164       void* private_data;
165    } debug;
166 };
167 
168 enum aco_statistic {
169    aco_statistic_hash,
170    aco_statistic_instructions,
171    aco_statistic_copies,
172    aco_statistic_branches,
173    aco_statistic_latency,
174    aco_statistic_inv_throughput,
175    aco_statistic_vmem_clauses,
176    aco_statistic_smem_clauses,
177    aco_statistic_sgpr_presched,
178    aco_statistic_vgpr_presched,
179    aco_statistic_valu,
180    aco_statistic_salu,
181    aco_statistic_vmem,
182    aco_statistic_smem,
183    aco_statistic_vopd,
184    aco_num_statistics
185 };
186 
187 enum aco_symbol_id {
188    aco_symbol_invalid,
189    aco_symbol_scratch_addr_lo,
190    aco_symbol_scratch_addr_hi,
191    aco_symbol_lds_ngg_scratch_base,
192    aco_symbol_lds_ngg_gs_out_vertex_base,
193    aco_symbol_const_data_addr,
194 };
195 
196 struct aco_symbol {
197    enum aco_symbol_id id;
198    unsigned offset;
199 };
200 
201 #define MAX_SGPRS 108
202 #define MAX_VGPRS       256
203 #define MAX_LDS_SIZE    65536 /* 64 KiB */
204 #define NUM_SAVED_VGPRS 2
205 
206 struct aco_trap_handler_layout {
207    uint32_t saved_vgprs[NUM_SAVED_VGPRS * 64];
208 
209    uint32_t ttmp0;
210    uint32_t ttmp1;
211 
212    struct {
213       uint32_t status;
214       uint32_t mode;
215       uint32_t trap_sts;
216       uint32_t hw_id1;
217       uint32_t gpr_alloc;
218       uint32_t lds_alloc;
219       uint32_t ib_sts;
220    } sq_wave_regs;
221 
222    uint32_t m0;
223    uint32_t exec_lo;
224    uint32_t exec_hi;
225    uint32_t sgprs[MAX_SGPRS];
226    uint32_t vgprs[MAX_VGPRS * 64];
227    uint32_t lds[MAX_LDS_SIZE / 4];
228 };
229 
230 #ifdef __cplusplus
231 }
232 #endif
233 #endif
234