• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 #ifndef ACO_SHADER_INFO_H
11 #define ACO_SHADER_INFO_H
12 
13 #include "ac_hw_stage.h"
14 #include "ac_shader_args.h"
15 #include "amd_family.h"
16 #include "shader_enums.h"
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 #define ACO_MAX_SO_OUTPUTS     128
23 #define ACO_MAX_SO_BUFFERS     4
24 #define ACO_MAX_VERTEX_ATTRIBS 32
25 #define ACO_MAX_VBS            32
26 
27 struct aco_vs_prolog_info {
28    struct ac_arg inputs;
29 
30    uint32_t instance_rate_inputs;
31    uint32_t nontrivial_divisors;
32    uint32_t zero_divisors;
33    uint32_t post_shuffle;
34    /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes
35     * using bitwise arithmetic.
36     */
37    uint32_t alpha_adjust_lo;
38    uint32_t alpha_adjust_hi;
39 
40    uint8_t formats[ACO_MAX_VERTEX_ATTRIBS];
41 
42    unsigned num_attributes;
43    uint32_t misaligned_mask;
44    uint32_t unaligned_mask;
45    bool is_ngg;
46    gl_shader_stage next_stage;
47 };
48 
49 struct aco_ps_epilog_info {
50    struct ac_arg colors[MAX_DRAW_BUFFERS];
51    uint8_t color_map[MAX_DRAW_BUFFERS];
52 
53    uint32_t spi_shader_col_format;
54 
55    /* Bitmasks, each bit represents one of the 8 MRTs. */
56    uint8_t color_is_int8;
57    uint8_t color_is_int10;
58 
59    bool mrt0_is_dual_src;
60 
61    bool alpha_to_coverage_via_mrtz;
62    bool alpha_to_one;
63 
64    /* OpenGL only */
65    uint16_t color_types;
66    bool clamp_color;
67    bool skip_null_export;
68    bool writes_all_cbufs;
69    enum compare_func alpha_func;
70    /* Depth/stencil/samplemask are always passed via VGPRs, and the epilog key can choose
71     * not to export them using these flags, which can be dynamic states.
72     */
73    bool kill_depth;
74    bool kill_stencil;
75    bool kill_samplemask;
76 
77    struct ac_arg alpha_reference;
78    struct ac_arg depth;
79    struct ac_arg stencil;
80    struct ac_arg samplemask;
81 };
82 
83 struct aco_ps_prolog_info {
84    bool poly_stipple;
85    unsigned poly_stipple_buf_offset;
86 
87    bool bc_optimize_for_persp;
88    bool bc_optimize_for_linear;
89    bool force_persp_sample_interp;
90    bool force_linear_sample_interp;
91    bool force_persp_center_interp;
92    bool force_linear_center_interp;
93 
94    unsigned samplemask_log_ps_iter;
95    bool get_frag_coord_from_pixel_coord;
96    bool pixel_center_integer;
97    bool force_samplemask_to_helper_invocation;
98    unsigned num_interp_inputs;
99    unsigned colors_read;
100    int color_interp_vgpr_index[2];
101    int color_attr_index[2];
102    bool color_two_side;
103    bool needs_wqm;
104 
105    struct ac_arg internal_bindings;
106 };
107 
108 struct aco_shader_info {
109    enum ac_hw_stage hw_stage;
110    uint8_t wave_size;
111    bool schedule_ngg_pos_exports; /* Whether we should schedule position exports up or not. */
112    bool image_2d_view_of_3d;
113    unsigned workgroup_size;
114    bool merged_shader_compiled_separately; /* GFX9+ */
115    struct ac_arg next_stage_pc;
116    struct ac_arg epilog_pc; /* Vulkan only */
117    struct {
118       bool tcs_in_out_eq;
119       bool any_tcs_inputs_via_lds;
120       bool has_prolog;
121    } vs;
122    struct {
123       struct ac_arg tcs_offchip_layout;
124 
125       /* Vulkan only */
126       uint32_t num_lds_blocks;
127    } tcs;
128    struct {
129       uint32_t num_inputs;
130       unsigned spi_ps_input_ena;
131       unsigned spi_ps_input_addr;
132       bool has_prolog;
133       bool has_epilog;
134 
135       /* OpenGL only */
136       struct ac_arg alpha_reference;
137    } ps;
138    struct {
139       bool uses_full_subgroups;
140    } cs;
141 
142    uint32_t gfx9_gs_ring_lds_size;
143 };
144 
145 enum aco_compiler_debug_level {
146    ACO_COMPILER_DEBUG_LEVEL_ERROR,
147 };
148 
149 struct aco_compiler_options {
150    bool dump_ir;
151    bool dump_preoptir;
152    bool record_asm;
153    bool record_ir;
154    bool record_stats;
155    bool has_ls_vgpr_init_bug;
156    bool load_grid_size_from_user_sgpr;
157    bool optimisations_disabled;
158    uint8_t enable_mrt_output_nan_fixup;
159    bool wgp_mode;
160    bool is_opengl;
161    enum radeon_family family;
162    enum amd_gfx_level gfx_level;
163    uint32_t address32_hi;
164    struct {
165       void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
166       void* private_data;
167    } debug;
168 };
169 
170 enum aco_statistic {
171    aco_statistic_hash,
172    aco_statistic_instructions,
173    aco_statistic_copies,
174    aco_statistic_branches,
175    aco_statistic_latency,
176    aco_statistic_inv_throughput,
177    aco_statistic_vmem_clauses,
178    aco_statistic_smem_clauses,
179    aco_statistic_sgpr_presched,
180    aco_statistic_vgpr_presched,
181    aco_statistic_valu,
182    aco_statistic_salu,
183    aco_statistic_vmem,
184    aco_statistic_smem,
185    aco_statistic_vopd,
186    aco_num_statistics
187 };
188 
189 enum aco_symbol_id {
190    aco_symbol_invalid,
191    aco_symbol_scratch_addr_lo,
192    aco_symbol_scratch_addr_hi,
193    aco_symbol_lds_ngg_scratch_base,
194    aco_symbol_lds_ngg_gs_out_vertex_base,
195    aco_symbol_const_data_addr,
196 };
197 
198 struct aco_symbol {
199    enum aco_symbol_id id;
200    unsigned offset;
201 };
202 
203 #define MAX_SGPRS 108
204 #define MAX_VGPRS       256
205 #define MAX_LDS_SIZE    65536 /* 64 KiB */
206 #define NUM_SAVED_VGPRS 2
207 
208 struct aco_trap_handler_layout {
209    uint32_t saved_vgprs[NUM_SAVED_VGPRS * 64];
210 
211    uint32_t ttmp0;
212    uint32_t ttmp1;
213 
214    struct {
215       uint32_t status;
216       uint32_t mode;
217       uint32_t trap_sts;
218       uint32_t hw_id1;
219       uint32_t gpr_alloc;
220       uint32_t lds_alloc;
221       uint32_t ib_sts;
222    } sq_wave_regs;
223 
224    uint32_t m0;
225    uint32_t exec_lo;
226    uint32_t exec_hi;
227    uint32_t sgprs[MAX_SGPRS];
228    uint32_t vgprs[MAX_VGPRS * 64];
229    uint32_t lds[MAX_LDS_SIZE / 4];
230 };
231 
232 #ifdef __cplusplus
233 }
234 #endif
235 #endif
236