1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef SB_SHADER_H_ 28 #define SB_SHADER_H_ 29 30 #include <list> 31 #include <string> 32 #include <map> 33 34 #include "sb_ir.h" 35 #include "sb_expr.h" 36 37 namespace r600_sb { 38 39 struct shader_input { 40 unsigned comp_mask; 41 unsigned preloaded; 42 }; 43 44 struct error_info { 45 node *n; 46 unsigned arg_index; 47 std::string message; 48 }; 49 50 typedef std::multimap<node*, error_info> error_map; 51 52 class sb_context; 53 54 typedef std::vector<shader_input> inputs_vec; 55 typedef std::vector<gpr_array*> gpr_array_vec; 56 57 struct ra_edge { 58 value *a, *b; 59 unsigned cost; 60 ra_edgera_edge61 ra_edge(value *a, value *b, unsigned cost) : a(a), b(b), cost(cost) {} 62 }; 63 64 enum chunk_flags { 65 RCF_GLOBAL = (1 << 0), 66 RCF_PIN_CHAN = (1 << 1), 67 RCF_PIN_REG = (1 << 2), 68 69 RCF_FIXED = (1 << 3), 70 71 RCF_PREALLOC = (1 << 4) 72 }; 73 74 enum dce_flags { 75 DF_REMOVE_DEAD = (1 << 0), 76 DF_REMOVE_UNUSED = (1 << 1), 77 DF_EXPAND = (1 << 2), 78 }; 79 80 inline dce_flags operator |(dce_flags l, dce_flags r) { 81 return (dce_flags)((unsigned)l|(unsigned)r); 82 } 83 84 inline chunk_flags operator |(chunk_flags l, chunk_flags r) { 85 return (chunk_flags)((unsigned)l|(unsigned)r); 86 } 87 inline chunk_flags& operator |=(chunk_flags &l, chunk_flags r) { 88 l = l | r; 89 return l; 90 } 91 92 inline chunk_flags& operator &=(chunk_flags &l, chunk_flags r) { 93 l = (chunk_flags)((unsigned)l & (unsigned)r); 94 return l; 95 } 96 97 inline chunk_flags operator ~(chunk_flags r) { 98 return (chunk_flags)~(unsigned)r; 99 } 100 101 struct ra_chunk { 102 vvec values; 103 chunk_flags flags; 104 unsigned cost; 105 sel_chan pin; 106 ra_chunkra_chunk107 ra_chunk() : values(), flags(), cost(), pin() {} 108 is_fixedra_chunk109 bool is_fixed() { return flags & RCF_FIXED; } fixra_chunk110 void fix() { flags |= RCF_FIXED; } 111 is_globalra_chunk112 bool is_global() { return flags & RCF_GLOBAL; } set_globalra_chunk113 void set_global() { flags |= RCF_GLOBAL; } 114 is_reg_pinnedra_chunk115 bool is_reg_pinned() { return flags & RCF_PIN_REG; } is_chan_pinnedra_chunk116 bool is_chan_pinned() { return flags & RCF_PIN_CHAN; } 117 is_preallocra_chunk118 bool is_prealloc() { return flags & RCF_PREALLOC; } set_preallocra_chunk119 void set_prealloc() { flags |= RCF_PREALLOC; } 120 }; 121 122 typedef std::vector<ra_chunk*> chunk_vector; 123 124 class ra_constraint { 125 public: ra_constraint(constraint_kind kind)126 ra_constraint(constraint_kind kind) : kind(kind), cost(0) {} 127 128 constraint_kind kind; 129 vvec values; 130 unsigned cost; 131 132 void update_values(); 133 bool check(); 134 }; 135 136 typedef std::vector<ra_constraint*> constraint_vec; 137 typedef std::vector<ra_chunk*> chunk_vec; 138 139 // priority queue 140 // FIXME use something more suitale or custom class ? 141 142 template <class T> 143 struct cost_compare { operatorcost_compare144 bool operator ()(const T& t1, const T& t2) { 145 return t1->cost > t2->cost; 146 } 147 }; 148 149 template <class T, class Comp> 150 class queue { 151 typedef std::vector<T> container; 152 container cont; 153 154 public: queue()155 queue() : cont() {} 156 157 typedef typename container::iterator iterator; 158 begin()159 iterator begin() { return cont.begin(); } end()160 iterator end() { return cont.end(); } 161 insert(const T & t)162 iterator insert(const T& t) { 163 iterator I = std::upper_bound(begin(), end(), t, Comp()); 164 if (I == end()) 165 cont.push_back(t); 166 else 167 cont.insert(I, t); 168 169 return I; 170 } 171 erase(const T & t)172 void erase(const T& t) { 173 std::pair<iterator, iterator> R = 174 std::equal_range(begin(), end(), t, Comp()); 175 iterator F = std::find(R.first, R.second, t); 176 if (F != R.second) 177 cont.erase(F); 178 } 179 }; 180 181 typedef queue<ra_chunk*, cost_compare<ra_chunk*> > chunk_queue; 182 typedef queue<ra_edge*, cost_compare<ra_edge*> > edge_queue; 183 typedef queue<ra_constraint*, cost_compare<ra_constraint*> > constraint_queue; 184 185 typedef std::set<ra_chunk*> chunk_set; 186 187 class shader; 188 189 class coalescer { 190 191 shader &sh; 192 193 edge_queue edges; 194 chunk_queue chunks; 195 constraint_queue constraints; 196 197 constraint_vec all_constraints; 198 chunk_vec all_chunks; 199 200 public: 201 coalescer(shader & sh)202 coalescer(shader &sh) : sh(sh), edges(), chunks(), constraints() {} 203 ~coalescer(); 204 205 int run(); 206 207 void add_edge(value *a, value *b, unsigned cost); 208 void build_chunks(); 209 void build_constraint_queue(); 210 void build_chunk_queue(); 211 int color_constraints(); 212 int color_chunks(); 213 214 ra_constraint* create_constraint(constraint_kind kind); 215 216 enum ac_cost { 217 phi_cost = 10000, 218 copy_cost = 1, 219 }; 220 221 void dump_edges(); 222 void dump_chunks(); 223 void dump_constraint_queue(); 224 225 static void dump_chunk(ra_chunk *c); 226 static void dump_constraint(ra_constraint* c); 227 228 void get_chunk_interferences(ra_chunk *c, val_set &s); 229 230 private: 231 232 void create_chunk(value *v); 233 void unify_chunks(ra_edge *e); 234 bool chunks_interference(ra_chunk *c1, ra_chunk *c2); 235 236 int color_reg_constraint(ra_constraint *c); 237 void color_phi_constraint(ra_constraint *c); 238 239 240 void init_reg_bitset(sb_bitset &bs, val_set &vs); 241 242 void color_chunk(ra_chunk *c, sel_chan color); 243 244 ra_chunk* detach_value(value *v); 245 }; 246 247 248 249 class shader { 250 251 sb_context &ctx; 252 253 typedef sb_map<uint32_t, value*> value_map; 254 value_map reg_values; 255 256 // read-only values 257 value_map const_values; // immediate constants key -const value (uint32_t) 258 value_map special_ro_values; // key - hw alu_sel & chan 259 value_map kcache_values; 260 261 gpr_array_vec gpr_arrays; 262 263 unsigned next_temp_value_index; 264 265 unsigned prep_regs_count; 266 267 value* pred_sels[2]; 268 269 regions_vec regions; 270 inputs_vec inputs; 271 272 value *undef; 273 274 sb_value_pool val_pool; 275 sb_pool pool; 276 277 std::vector<node*> all_nodes; 278 279 public: 280 shader_stats src_stats, opt_stats; 281 282 error_map errors; 283 284 bool optimized; 285 286 unsigned id; 287 288 coalescer coal; 289 290 static const unsigned temp_regid_offset = 512; 291 292 bbs_vec bbs; 293 294 const shader_target target; 295 296 value_table vt; 297 expr_handler ex; 298 299 container_node *root; 300 301 bool compute_interferences; 302 303 bool has_alu_predication; 304 bool uses_gradients; 305 306 bool safe_math; 307 308 unsigned ngpr, nstack; 309 310 unsigned dce_flags; 311 312 shader(sb_context &sctx, shader_target t, unsigned id); 313 314 ~shader(); 315 get_ctx()316 sb_context &get_ctx() const { return ctx; } 317 318 value* get_const_value(const literal & v); 319 value* get_special_value(unsigned sv_id, unsigned version = 0); 320 value* create_temp_value(); 321 value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel, 322 unsigned version = 0); 323 324 325 value* get_special_ro_value(unsigned sel); 326 value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode); 327 328 value* get_value_version(value* v, unsigned ver); 329 330 void init(); 331 void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src); 332 333 void dump_ir(); 334 335 void add_gpr_array(unsigned gpr_start, unsigned gpr_count, 336 unsigned comp_mask); 337 338 value* get_pred_sel(int sel); 339 bool assign_slot(alu_node *n, alu_node *slots[5]); 340 341 gpr_array* get_gpr_array(unsigned reg, unsigned chan); 342 343 void add_input(unsigned gpr, bool preloaded = false, 344 unsigned comp_mask = 0xF); 345 get_inputs()346 const inputs_vec & get_inputs() {return inputs; } 347 get_regions()348 regions_vec & get_regions() { return regions; } 349 350 void init_call_fs(cf_node *cf); 351 352 value *get_undef_value(); 353 void set_undef(val_set &s); 354 355 node* create_node(node_type nt, node_subtype nst, 356 node_flags flags = NF_EMPTY); 357 alu_node* create_alu(); 358 alu_group_node* create_alu_group(); 359 alu_packed_node* create_alu_packed(); 360 cf_node* create_cf(); 361 cf_node* create_cf(unsigned op); 362 fetch_node* create_fetch(); 363 region_node* create_region(); 364 depart_node* create_depart(region_node *target); 365 repeat_node* create_repeat(region_node *target); 366 container_node* create_container(node_type nt = NT_LIST, 367 node_subtype nst = NST_LIST, 368 node_flags flags = NF_EMPTY); 369 if_node* create_if(); 370 bb_node* create_bb(unsigned id, unsigned loop_level); 371 get_value_by_uid(unsigned id)372 value* get_value_by_uid(unsigned id) { return val_pool[id - 1]; } 373 374 cf_node* create_clause(node_subtype nst); 375 376 void create_bbs(); 377 void expand_bbs(); 378 379 alu_node* create_mov(value* dst, value* src); 380 alu_node* create_copy_mov(value *dst, value *src, unsigned affcost = 1); 381 382 const char * get_shader_target_name(); 383 384 std::string get_full_target_name(); 385 386 void create_bbs(container_node* n, bbs_vec &bbs, int loop_level = 0); 387 void expand_bbs(bbs_vec &bbs); 388 389 sched_queue_id get_queue_id(node* n); 390 391 void simplify_dep_rep(node *dr); 392 393 unsigned first_temp_gpr(); 394 unsigned num_nontemp_gpr(); 395 arrays()396 gpr_array_vec& arrays() { return gpr_arrays; } 397 398 void set_uses_kill(); 399 400 void fill_array_values(gpr_array *a, vvec &vv); 401 402 alu_node* clone(alu_node *n); 403 get_value_pool()404 sb_value_pool& get_value_pool() { return val_pool; } 405 406 void collect_stats(bool opt); 407 408 private: 409 value* create_value(value_kind k, sel_chan regid, unsigned ver); 410 value* get_value(value_kind kind, sel_chan id, 411 unsigned version = 0); 412 value* get_ro_value(value_map &vm, value_kind vk, unsigned key); 413 }; 414 415 } 416 417 #endif /* SHADER_H_ */ 418