1 /*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Vadim Girlin
25 */
26
27 #define SB_RA_SCHED_CHECK DEBUG
28
29 #include "util/os_time.h"
30 #include "r600_pipe.h"
31 #include "r600_shader.h"
32
33 #include "sb_public.h"
34
35 #include <stack>
36 #include <map>
37
38 #include "sb_bc.h"
39 #include "sb_shader.h"
40 #include "sb_pass.h"
41 #include "sb_sched.h"
42
43 using namespace r600_sb;
44
45 static sb_hw_class translate_chip_class(enum chip_class cc);
46 static sb_hw_chip translate_chip(enum radeon_family rf);
47
r600_sb_context_create(struct r600_context * rctx)48 sb_context *r600_sb_context_create(struct r600_context *rctx) {
49
50 sb_context *sctx = new sb_context();
51
52 if (sctx->init(rctx->isa, translate_chip(rctx->b.family),
53 translate_chip_class(rctx->b.chip_class))) {
54 delete sctx;
55 sctx = NULL;
56 }
57
58 unsigned df = rctx->screen->b.debug_flags;
59
60 sb_context::dump_pass = df & DBG_SB_DUMP;
61 sb_context::dump_stat = df & DBG_SB_STAT;
62 sb_context::dry_run = df & DBG_SB_DRY_RUN;
63 sb_context::no_fallback = df & DBG_SB_NO_FALLBACK;
64 sb_context::safe_math = df & DBG_SB_SAFEMATH;
65
66 sb_context::dskip_start = debug_get_num_option("R600_SB_DSKIP_START", 0);
67 sb_context::dskip_end = debug_get_num_option("R600_SB_DSKIP_END", 0);
68 sb_context::dskip_mode = debug_get_num_option("R600_SB_DSKIP_MODE", 0);
69
70 return sctx;
71 }
72
r600_sb_context_destroy(void * sctx)73 void r600_sb_context_destroy(void * sctx) {
74 if (sctx) {
75 sb_context *ctx = static_cast<sb_context*>(sctx);
76
77 if (sb_context::dump_stat) {
78 sblog << "\ncontext src stats: ";
79 ctx->src_stats.dump();
80 sblog << "context opt stats: ";
81 ctx->opt_stats.dump();
82 sblog << "context diff: ";
83 ctx->src_stats.dump_diff(ctx->opt_stats);
84 }
85
86 delete ctx;
87 }
88 }
89
r600_sb_bytecode_process(struct r600_context * rctx,struct r600_bytecode * bc,struct r600_shader * pshader,int dump_bytecode,int optimize)90 int r600_sb_bytecode_process(struct r600_context *rctx,
91 struct r600_bytecode *bc,
92 struct r600_shader *pshader,
93 int dump_bytecode,
94 int optimize) {
95 int r = 0;
96 unsigned shader_id = bc->debug_id;
97
98 sb_context *ctx = (sb_context *)rctx->sb_context;
99 if (!ctx) {
100 rctx->sb_context = ctx = r600_sb_context_create(rctx);
101 }
102
103 int64_t time_start = 0;
104 if (sb_context::dump_stat) {
105 time_start = os_time_get_nano();
106 }
107
108 SB_DUMP_STAT( sblog << "\nsb: shader " << shader_id << "\n"; );
109
110 bc_parser parser(*ctx, bc, pshader);
111
112 if ((r = parser.decode())) {
113 assert(!"sb: bytecode decoding error");
114 return r;
115 }
116
117 shader *sh = parser.get_shader();
118
119 if (dump_bytecode) {
120 bc_dump(*sh, bc->bytecode, bc->ndw).run();
121 }
122
123 if (!optimize) {
124 delete sh;
125 return 0;
126 }
127
128 if (sh->target != TARGET_FETCH) {
129 sh->src_stats.ndw = bc->ndw;
130 sh->collect_stats(false);
131 }
132
133 /* skip some shaders (use shaders from default backend)
134 * dskip_start - range start, dskip_end - range_end,
135 * e.g. start = 5, end = 6 means shaders 5 & 6
136 *
137 * dskip_mode == 0 - disabled,
138 * dskip_mode == 1 - don't process the shaders from the [start;end] range
139 * dskip_mode == 2 - process only the shaders from the range
140 */
141 if (sb_context::dskip_mode) {
142 if ((sb_context::dskip_start <= shader_id &&
143 shader_id <= sb_context::dskip_end) ==
144 (sb_context::dskip_mode == 1)) {
145 sblog << "sb: skipped shader " << shader_id << " : " << "["
146 << sb_context::dskip_start << "; "
147 << sb_context::dskip_end << "] mode "
148 << sb_context::dskip_mode << "\n";
149 return 0;
150 }
151 }
152
153 if ((r = parser.prepare())) {
154 assert(!"sb: bytecode parsing error");
155 return r;
156 }
157
158 SB_DUMP_PASS( sblog << "\n\n###### after parse\n"; sh->dump_ir(); );
159
160 #define SB_RUN_PASS(n, dump) \
161 do { \
162 r = n(*sh).run(); \
163 if (r) { \
164 sblog << "sb: error (" << r << ") in the " << #n << " pass.\n"; \
165 if (sb_context::no_fallback) \
166 return r; \
167 sblog << "sb: using unoptimized bytecode...\n"; \
168 delete sh; \
169 return 0; \
170 } \
171 if (dump) { \
172 SB_DUMP_PASS( sblog << "\n\n###### after " << #n << "\n"; \
173 sh->dump_ir();); \
174 } \
175 assert(!r); \
176 } while (0)
177
178 SB_RUN_PASS(ssa_prepare, 0);
179 SB_RUN_PASS(ssa_rename, 1);
180
181 if (sh->has_alu_predication)
182 SB_RUN_PASS(psi_ops, 1);
183
184 SB_RUN_PASS(liveness, 0);
185
186 sh->dce_flags = DF_REMOVE_DEAD | DF_EXPAND;
187 SB_RUN_PASS(dce_cleanup, 0);
188 SB_RUN_PASS(def_use, 0);
189
190 sh->set_undef(sh->root->live_before);
191
192 // if conversion breaks the dependency tracking between CF_EMIT ops when it removes
193 // the phi nodes for SV_GEOMETRY_EMIT. Just disable it for GS
194 if ((sh->target != TARGET_GS && sh->target != TARGET_HS) || pshader->needs_scratch_space)
195 SB_RUN_PASS(if_conversion, 1);
196
197 // if_conversion breaks info about uses, but next pass (peephole)
198 // doesn't need it, so we can skip def/use update here
199 // until it's really required
200 //SB_RUN_PASS(def_use, 0);
201
202 SB_RUN_PASS(peephole, 1);
203 SB_RUN_PASS(def_use, 0);
204
205 SB_RUN_PASS(gvn, 1);
206
207 SB_RUN_PASS(def_use, 1);
208
209 sh->dce_flags = DF_REMOVE_DEAD | DF_REMOVE_UNUSED;
210 SB_RUN_PASS(dce_cleanup, 1);
211
212 SB_RUN_PASS(ra_split, 0);
213 SB_RUN_PASS(def_use, 0);
214
215 // create 'basic blocks'. it's not like we build CFG, they are just
216 // container nodes in the correct locations for code placement
217 sh->create_bbs();
218
219 SB_RUN_PASS(gcm, 1);
220
221 sh->compute_interferences = true;
222 SB_RUN_PASS(liveness, 0);
223
224 sh->dce_flags = DF_REMOVE_DEAD;
225 SB_RUN_PASS(dce_cleanup, 1);
226
227 SB_RUN_PASS(ra_coalesce, 1);
228 SB_RUN_PASS(ra_init, 1);
229
230 SB_RUN_PASS(post_scheduler, 1);
231
232 sh->expand_bbs();
233
234 #if SB_RA_SCHED_CHECK
235 // check code correctness after regalloc/scheduler
236 SB_RUN_PASS(ra_checker, 0);
237 #endif
238
239 SB_RUN_PASS(bc_finalizer, 0);
240
241 sh->optimized = true;
242
243 bc_builder builder(*sh);
244
245 if ((r = builder.build())) {
246 assert(0);
247 return r;
248 }
249
250 bytecode &nbc = builder.get_bytecode();
251
252 if (dump_bytecode) {
253 bc_dump(*sh, &nbc).run();
254 }
255
256 if (!sb_context::dry_run) {
257
258 free(bc->bytecode);
259 bc->ndw = nbc.ndw();
260 bc->bytecode = (uint32_t*) malloc(bc->ndw << 2);
261 nbc.write_data(bc->bytecode);
262
263 bc->ngpr = sh->ngpr;
264 bc->nstack = sh->nstack;
265 } else {
266 SB_DUMP_STAT( sblog << "sb: dry run: optimized bytecode is not used\n"; );
267 }
268
269 if (sb_context::dump_stat) {
270 int64_t t = os_time_get_nano() - time_start;
271
272 sblog << "sb: processing shader " << shader_id << " done ( "
273 << ((double)t)/1000000.0 << " ms ).\n";
274
275 sh->opt_stats.ndw = bc->ndw;
276 sh->collect_stats(true);
277
278 sblog << "src stats: ";
279 sh->src_stats.dump();
280 sblog << "opt stats: ";
281 sh->opt_stats.dump();
282 sblog << "diff: ";
283 sh->src_stats.dump_diff(sh->opt_stats);
284 }
285
286 delete sh;
287 return 0;
288 }
289
translate_chip(enum radeon_family rf)290 static sb_hw_chip translate_chip(enum radeon_family rf) {
291 switch (rf) {
292
293 #define TRANSLATE_CHIP(c) case CHIP_##c: return HW_CHIP_##c
294 TRANSLATE_CHIP(R600);
295 TRANSLATE_CHIP(RV610);
296 TRANSLATE_CHIP(RV630);
297 TRANSLATE_CHIP(RV670);
298 TRANSLATE_CHIP(RV620);
299 TRANSLATE_CHIP(RV635);
300 TRANSLATE_CHIP(RS780);
301 TRANSLATE_CHIP(RS880);
302 TRANSLATE_CHIP(RV770);
303 TRANSLATE_CHIP(RV730);
304 TRANSLATE_CHIP(RV710);
305 TRANSLATE_CHIP(RV740);
306 TRANSLATE_CHIP(CEDAR);
307 TRANSLATE_CHIP(REDWOOD);
308 TRANSLATE_CHIP(JUNIPER);
309 TRANSLATE_CHIP(CYPRESS);
310 TRANSLATE_CHIP(HEMLOCK);
311 TRANSLATE_CHIP(PALM);
312 TRANSLATE_CHIP(SUMO);
313 TRANSLATE_CHIP(SUMO2);
314 TRANSLATE_CHIP(BARTS);
315 TRANSLATE_CHIP(TURKS);
316 TRANSLATE_CHIP(CAICOS);
317 TRANSLATE_CHIP(CAYMAN);
318 TRANSLATE_CHIP(ARUBA);
319 #undef TRANSLATE_CHIP
320
321 default:
322 assert(!"unknown chip");
323 return HW_CHIP_UNKNOWN;
324 }
325 }
326
translate_chip_class(enum chip_class cc)327 static sb_hw_class translate_chip_class(enum chip_class cc) {
328 switch(cc) {
329 case R600: return HW_CLASS_R600;
330 case R700: return HW_CLASS_R700;
331 case EVERGREEN: return HW_CLASS_EVERGREEN;
332 case CAYMAN: return HW_CLASS_CAYMAN;
333
334 default:
335 assert(!"unknown chip class");
336 return HW_CLASS_UNKNOWN;
337 }
338 }
339