1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #ifndef SB_SCHED_H_ 28 #define SB_SCHED_H_ 29 30 namespace r600_sb { 31 32 typedef sb_map<node*, unsigned> uc_map; 33 34 // resource trackers for scheduler 35 // rp = read port 36 // uc = use count 37 38 typedef sb_set<unsigned> kc_lines; 39 40 class rp_kcache_tracker { 41 unsigned rp[4]; 42 unsigned uc[4]; 43 const unsigned sel_count; 44 kc_sel(sel_chan r)45 unsigned kc_sel(sel_chan r) { 46 return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1; 47 } 48 49 public: 50 rp_kcache_tracker(shader &sh); 51 52 bool try_reserve(node *n); 53 void unreserve(node *n); 54 55 56 bool try_reserve(sel_chan r); 57 void unreserve(sel_chan r); 58 59 void reset(); 60 num_sels()61 unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; } 62 63 unsigned get_lines(kc_lines &lines); 64 }; 65 66 class literal_tracker { 67 literal lt[4]; 68 unsigned uc[4]; 69 70 public: literal_tracker()71 literal_tracker() : lt(), uc() {} 72 73 bool try_reserve(alu_node *n); 74 void unreserve(alu_node *n); 75 76 bool try_reserve(literal l); 77 void unreserve(literal l); 78 79 void reset(); 80 count()81 unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; } 82 83 void init_group_literals(alu_group_node *g); 84 85 }; 86 87 class rp_gpr_tracker { 88 // rp[cycle][elem] 89 unsigned rp[3][4]; 90 unsigned uc[3][4]; 91 92 public: rp_gpr_tracker()93 rp_gpr_tracker() : rp(), uc() {} 94 95 bool try_reserve(alu_node *n); 96 void unreserve(alu_node *n); 97 98 bool try_reserve(unsigned cycle, unsigned sel, unsigned chan); 99 void unreserve(unsigned cycle, unsigned sel, unsigned chan); 100 101 void reset(); 102 103 void dump(); 104 }; 105 106 class alu_group_tracker { 107 108 shader &sh; 109 110 rp_kcache_tracker kc; 111 rp_gpr_tracker gpr; 112 literal_tracker lt; 113 114 alu_node * slots[5]; 115 116 unsigned available_slots; 117 118 unsigned max_slots; 119 120 typedef std::map<value*, unsigned> value_index_map; 121 122 value_index_map vmap; 123 124 bool has_mova; 125 bool uses_ar; 126 bool has_predset; 127 bool has_kill; 128 bool updates_exec_mask; 129 130 bool consumes_lds_oqa; 131 bool produces_lds_oqa; 132 unsigned chan_count[4]; 133 134 // param index + 1 (0 means that group doesn't refer to Params) 135 // we can't use more than one param index in a group 136 unsigned interp_param; 137 138 unsigned next_id; 139 140 node_vec packed_ops; 141 142 void assign_slot(unsigned slot, alu_node *n); 143 144 public: 145 alu_group_tracker(shader &sh); 146 147 // FIXME use fast bs correctness check (values for same chan <= 3) ?? 148 bool try_reserve(alu_node *n); 149 bool try_reserve(alu_packed_node *p); 150 151 void reinit(); 152 void reset(bool keep_packed = false); 153 154 sel_chan get_value_id(value *v); 155 void update_flags(alu_node *n); 156 slot(unsigned i)157 alu_node* slot(unsigned i) { return slots[i]; } 158 used_slots()159 unsigned used_slots() { 160 return (~available_slots) & ((1 << max_slots) - 1); 161 } 162 inst_count()163 unsigned inst_count() { 164 return __builtin_popcount(used_slots()); 165 } 166 literal_count()167 unsigned literal_count() { return lt.count(); } literal_slot_count()168 unsigned literal_slot_count() { return (literal_count() + 1) >> 1; }; slot_count()169 unsigned slot_count() { return inst_count() + literal_slot_count(); } 170 get_consumes_lds_oqa()171 bool get_consumes_lds_oqa() { return consumes_lds_oqa; } get_produces_lds_oqa()172 bool get_produces_lds_oqa() { return produces_lds_oqa; } 173 alu_group_node* emit(); 174 kcache()175 rp_kcache_tracker& kcache() { return kc; } 176 has_update_exec_mask()177 bool has_update_exec_mask() { return updates_exec_mask; } avail_slots()178 unsigned avail_slots() { return available_slots; } 179 180 void discard_all_slots(container_node &removed_nodes); 181 void discard_slots(unsigned slot_mask, container_node &removed_nodes); 182 has_ar_load()183 bool has_ar_load() { return has_mova; } 184 }; 185 186 class alu_kcache_tracker { 187 bc_kcache kc[4]; 188 sb_set<unsigned> lines; 189 unsigned max_kcs; 190 191 public: 192 alu_kcache_tracker(sb_hw_class hc)193 alu_kcache_tracker(sb_hw_class hc) 194 : kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {} 195 196 void reset(); 197 bool try_reserve(alu_group_tracker >); 198 bool update_kc(); init_clause(bc_cf & bc)199 void init_clause(bc_cf &bc) { 200 memcpy(bc.kc, kc, sizeof(kc)); 201 } 202 }; 203 204 class alu_clause_tracker { 205 shader &sh; 206 207 alu_kcache_tracker kt; 208 unsigned slot_count; 209 210 alu_group_tracker grp0; 211 alu_group_tracker grp1; 212 213 unsigned group; 214 215 cf_node *clause; 216 217 bool push_exec_mask; 218 219 unsigned outstanding_lds_oqa_reads; 220 public: 221 container_node conflict_nodes; 222 223 // current values of AR and PR registers that we have to preload 224 // till the end of clause (in fact, beginning, because we're scheduling 225 // bottom-up) 226 value *current_ar; 227 value *current_pr; 228 // current values of CF_IDX registers that need preloading 229 value *current_idx[2]; 230 231 alu_clause_tracker(shader &sh); 232 233 void reset(); 234 235 // current group grp()236 alu_group_tracker& grp() { return group ? grp1 : grp0; } 237 // previous group prev_grp()238 alu_group_tracker& prev_grp() { return group ? grp0 : grp1; } 239 240 void emit_group(); 241 void emit_clause(container_node *c); 242 bool check_clause_limits(); 243 void new_group(); 244 bool is_empty(); 245 246 alu_node* create_ar_load(value *v, chan_select ar_channel); 247 248 void discard_current_group(); 249 total_slots()250 unsigned total_slots() { return slot_count; } 251 }; 252 253 class post_scheduler : public pass { 254 255 container_node ready, ready_copies; // alu only 256 container_node pending, bb_pending; 257 bb_node *cur_bb; 258 val_set live; // values live at the end of the alu clause 259 uc_map ucm; 260 alu_clause_tracker alu; 261 262 typedef std::map<sel_chan, value*> rv_map; 263 rv_map regmap, prev_regmap; 264 265 val_set cleared_interf; 266 267 void emit_index_registers(); 268 public: 269 post_scheduler(shader & sh)270 post_scheduler(shader &sh) : pass(sh), 271 ready(), ready_copies(), pending(), cur_bb(), 272 live(), ucm(), alu(sh), regmap(), cleared_interf() {} 273 274 virtual int run(); 275 bool run_on(container_node *n); 276 bool schedule_bb(bb_node *bb); 277 278 void load_index_register(value *v, unsigned idx); 279 void process_fetch(container_node *c); 280 281 bool process_alu(container_node *c); 282 bool schedule_alu(container_node *c); 283 bool prepare_alu_group(); 284 285 void release_op(node *n); 286 287 void release_src_values(node *n); 288 void release_src_vec(vvec &vv, bool src); 289 void release_src_val(value *v); 290 291 void init_uc_val(container_node *c, value *v); 292 void init_uc_vec(container_node *c, vvec &vv, bool src); 293 unsigned init_ucm(container_node *c, node *n); 294 295 void init_regmap(); 296 297 bool check_interferences(); 298 299 unsigned try_add_instruction(node *n); 300 301 bool check_copy(node *n); 302 void dump_group(alu_group_tracker &rt); 303 304 bool unmap_dst(alu_node *n); 305 bool unmap_dst_val(value *d); 306 307 bool map_src(alu_node *n); 308 bool map_src_vec(vvec &vv, bool src); 309 bool map_src_val(value *v); 310 311 bool recolor_local(value *v); 312 313 void update_local_interferences(); 314 void update_live_src_vec(vvec &vv, val_set *born, bool src); 315 void update_live_dst_vec(vvec &vv); 316 void update_live(node *n, val_set *born); 317 void process_group(); 318 319 void set_color_local_val(value *v, sel_chan color); 320 void set_color_local(value *v, sel_chan color); 321 322 void add_interferences(value *v, sb_bitset &rb, val_set &vs); 323 324 void init_globals(val_set &s, bool prealloc); 325 326 void recolor_locals(); 327 328 void dump_regmap(); 329 330 void emit_load_ar(); 331 void emit_clause(); 332 333 void process_ready_copies(); 334 }; 335 336 } // namespace r600_sb 337 338 #endif /* SB_SCHED_H_ */ 339