1 /*
2 * Copyright (C) 2015 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Rob Clark <robclark@freedesktop.org>
25 */
26
27 #include "util/ralloc.h"
28
29 #include "freedreno_dev_info.h"
30
31 #include "ir3_compiler.h"
32
33 static const struct debug_named_value shader_debug_options[] = {
34 /* clang-format off */
35 {"vs", IR3_DBG_SHADER_VS, "Print shader disasm for vertex shaders"},
36 {"tcs", IR3_DBG_SHADER_TCS, "Print shader disasm for tess ctrl shaders"},
37 {"tes", IR3_DBG_SHADER_TES, "Print shader disasm for tess eval shaders"},
38 {"gs", IR3_DBG_SHADER_GS, "Print shader disasm for geometry shaders"},
39 {"fs", IR3_DBG_SHADER_FS, "Print shader disasm for fragment shaders"},
40 {"cs", IR3_DBG_SHADER_CS, "Print shader disasm for compute shaders"},
41 {"disasm", IR3_DBG_DISASM, "Dump NIR and adreno shader disassembly"},
42 {"optmsgs", IR3_DBG_OPTMSGS, "Enable optimizer debug messages"},
43 {"forces2en", IR3_DBG_FORCES2EN, "Force s2en mode for tex sampler instructions"},
44 {"nouboopt", IR3_DBG_NOUBOOPT, "Disable lowering UBO to uniform"},
45 {"nofp16", IR3_DBG_NOFP16, "Don't lower mediump to fp16"},
46 {"nocache", IR3_DBG_NOCACHE, "Disable shader cache"},
47 {"spillall", IR3_DBG_SPILLALL, "Spill as much as possible to test the spiller"},
48 #ifdef DEBUG
49 /* DEBUG-only options: */
50 {"schedmsgs", IR3_DBG_SCHEDMSGS, "Enable scheduler debug messages"},
51 {"ramsgs", IR3_DBG_RAMSGS, "Enable register-allocation debug messages"},
52 #endif
53 DEBUG_NAMED_VALUE_END
54 /* clang-format on */
55 };
56
57 DEBUG_GET_ONCE_FLAGS_OPTION(ir3_shader_debug, "IR3_SHADER_DEBUG",
58 shader_debug_options, 0)
59 DEBUG_GET_ONCE_OPTION(ir3_shader_override_path, "IR3_SHADER_OVERRIDE_PATH",
60 NULL)
61
62 enum ir3_shader_debug ir3_shader_debug = 0;
63 const char *ir3_shader_override_path = NULL;
64
65 void
ir3_compiler_destroy(struct ir3_compiler * compiler)66 ir3_compiler_destroy(struct ir3_compiler *compiler)
67 {
68 disk_cache_destroy(compiler->disk_cache);
69 ralloc_free(compiler);
70 }
71
72 struct ir3_compiler *
ir3_compiler_create(struct fd_device * dev,const struct fd_dev_id * dev_id,bool robust_ubo_access)73 ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
74 bool robust_ubo_access)
75 {
76 struct ir3_compiler *compiler = rzalloc(NULL, struct ir3_compiler);
77
78 ir3_shader_debug = debug_get_option_ir3_shader_debug();
79 ir3_shader_override_path =
80 !__check_suid() ? debug_get_option_ir3_shader_override_path() : NULL;
81
82 if (ir3_shader_override_path) {
83 ir3_shader_debug |= IR3_DBG_NOCACHE;
84 }
85
86 compiler->dev = dev;
87 compiler->dev_id = dev_id;
88 compiler->gen = fd_dev_gen(dev_id);
89 compiler->robust_ubo_access = robust_ubo_access;
90
91 /* All known GPU's have 32k local memory (aka shared) */
92 compiler->local_mem_size = 32 * 1024;
93 /* TODO see if older GPU's were different here */
94 compiler->branchstack_size = 64;
95 compiler->wave_granularity = 2;
96 compiler->max_waves = 16;
97
98 if (compiler->gen >= 6) {
99 compiler->samgq_workaround = true;
100 /* a6xx split the pipeline state into geometry and fragment state, in
101 * order to let the VS run ahead of the FS. As a result there are now
102 * separate const files for the the fragment shader and everything
103 * else, and separate limits. There seems to be a shared limit, but
104 * it's higher than the vert or frag limits.
105 *
106 * TODO: The shared limit seems to be different on different on
107 * different models.
108 */
109 compiler->max_const_pipeline = 640;
110 compiler->max_const_frag = 512;
111 compiler->max_const_geom = 512;
112 compiler->max_const_safe = 128;
113
114 /* Compute shaders don't share a const file with the FS. Instead they
115 * have their own file, which is smaller than the FS one.
116 *
117 * TODO: is this true on earlier gen's?
118 */
119 compiler->max_const_compute = 256;
120
121 /* TODO: implement clip+cull distances on earlier gen's */
122 compiler->has_clip_cull = true;
123
124 /* TODO: implement private memory on earlier gen's */
125 compiler->has_pvtmem = true;
126
127 compiler->tess_use_shared =
128 fd_dev_info(compiler->dev_id)->a6xx.tess_use_shared;
129
130 compiler->storage_16bit =
131 fd_dev_info(compiler->dev_id)->a6xx.storage_16bit;
132 } else {
133 compiler->max_const_pipeline = 512;
134 compiler->max_const_geom = 512;
135 compiler->max_const_frag = 512;
136 compiler->max_const_compute = 512;
137
138 /* Note: this will have to change if/when we support tess+GS on
139 * earlier gen's.
140 */
141 compiler->max_const_safe = 256;
142 }
143
144 if (compiler->gen >= 6) {
145 compiler->reg_size_vec4 =
146 fd_dev_info(compiler->dev_id)->a6xx.reg_size_vec4;
147 } else if (compiler->gen >= 4) {
148 /* On a4xx-a5xx, using r24.x and above requires using the smallest
149 * threadsize.
150 */
151 compiler->reg_size_vec4 = 48;
152 } else {
153 /* TODO: confirm this */
154 compiler->reg_size_vec4 = 96;
155 }
156
157 if (compiler->gen >= 6) {
158 compiler->threadsize_base = 64;
159 } else if (compiler->gen >= 4) {
160 /* TODO: Confirm this for a4xx. For a5xx this is based on the Vulkan
161 * 1.1 subgroupSize which is 32.
162 */
163 compiler->threadsize_base = 32;
164 } else {
165 compiler->threadsize_base = 8;
166 }
167
168 if (compiler->gen >= 4) {
169 /* need special handling for "flat" */
170 compiler->flat_bypass = true;
171 compiler->levels_add_one = false;
172 compiler->unminify_coords = false;
173 compiler->txf_ms_with_isaml = false;
174 compiler->array_index_add_half = true;
175 compiler->instr_align = 16;
176 compiler->const_upload_unit = 4;
177 } else {
178 /* no special handling for "flat" */
179 compiler->flat_bypass = false;
180 compiler->levels_add_one = true;
181 compiler->unminify_coords = true;
182 compiler->txf_ms_with_isaml = true;
183 compiler->array_index_add_half = false;
184 compiler->instr_align = 4;
185 compiler->const_upload_unit = 8;
186 }
187
188 ir3_disk_cache_init(compiler);
189
190 return compiler;
191 }
192