1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #define IFC_DEBUG 0 28 29 #if IFC_DEBUG 30 #define IFC_DUMP(q) do { q } while (0) 31 #else 32 #define IFC_DUMP(q) 33 #endif 34 35 #include "sb_shader.h" 36 #include "sb_pass.h" 37 38 namespace r600_sb { 39 run()40 int if_conversion::run() { 41 42 regions_vec &rv = sh.get_regions(); 43 44 unsigned converted = 0; 45 46 for (regions_vec::reverse_iterator N, I = rv.rbegin(), E = rv.rend(); 47 I != E; I = N) { 48 N = I; ++N; 49 50 region_node *r = *I; 51 if (run_on(r)) { 52 rv.erase(I.base() - 1); 53 ++converted; 54 } 55 } 56 return 0; 57 } 58 convert_kill_instructions(region_node * r,value * em,bool branch,container_node * c)59 void if_conversion::convert_kill_instructions(region_node *r, 60 value *em, bool branch, 61 container_node *c) { 62 value *cnd = NULL; 63 64 for (node_iterator I = c->begin(), E = c->end(), N; I != E; I = N) { 65 N = I + 1; 66 67 if (!I->is_alu_inst()) 68 continue; 69 70 alu_node *a = static_cast<alu_node*>(*I); 71 unsigned flags = a->bc.op_ptr->flags; 72 73 if (!(flags & AF_KILL)) 74 continue; 75 76 // ignore predicated or non-const kill instructions 77 if (a->pred || !a->src[0]->is_const() || !a->src[1]->is_const()) 78 continue; 79 80 literal l0 = a->src[0]->literal_value; 81 literal l1 = a->src[1]->literal_value; 82 83 expr_handler::apply_alu_src_mod(a->bc, 0, l0); 84 expr_handler::apply_alu_src_mod(a->bc, 1, l1); 85 86 if (expr_handler::evaluate_condition(flags, l0, l1)) { 87 // kill with constant 'true' condition, we'll convert it to the 88 // conditional kill outside of the if-then-else block 89 90 a->remove(); 91 92 if (!cnd) { 93 cnd = get_select_value_for_em(sh, em); 94 } else { 95 // more than one kill with the same condition, just remove it 96 continue; 97 } 98 99 r->insert_before(a); 100 a->bc.set_op(branch ? ALU_OP2_KILLE_INT : ALU_OP2_KILLNE_INT); 101 102 a->src[0] = cnd; 103 a->src[1] = sh.get_const_value(0); 104 // clear modifiers 105 memset(&a->bc.src[0], 0, sizeof(bc_alu_src)); 106 memset(&a->bc.src[1], 0, sizeof(bc_alu_src)); 107 } else { 108 // kill with constant 'false' condition, this shouldn't happen 109 // but remove it anyway 110 a->remove(); 111 } 112 } 113 } 114 check_and_convert(region_node * r)115 bool if_conversion::check_and_convert(region_node *r) { 116 117 depart_node *nd1 = static_cast<depart_node*>(r->first); 118 if (!nd1->is_depart() || nd1->target != r) 119 return false; 120 if_node *nif = static_cast<if_node*>(nd1->first); 121 if (!nif->is_if()) 122 return false; 123 depart_node *nd2 = static_cast<depart_node*>(nif->first); 124 if (!nd2->is_depart() || nd2->target != r) 125 return false; 126 127 value* &em = nif->cond; 128 129 node_stats s; 130 131 r->collect_stats(s); 132 133 IFC_DUMP( 134 sblog << "ifcvt: region " << r->region_id << " :\n"; 135 s.dump(); 136 ); 137 138 if (s.region_count || s.fetch_count || s.alu_kill_count || 139 s.if_count != 1 || s.repeat_count) 140 return false; 141 142 unsigned real_alu_count = s.alu_count - s.alu_copy_mov_count; 143 144 // if_conversion allows to eliminate JUMP-ALU_POP_AFTER or 145 // JUMP-ALU-ELSE-ALU_POP_AFTER, for now let's assume that 3 CF instructions 146 // are eliminated. According to the docs, cost of CF instruction is 147 // equal to ~40 ALU VLIW instructions (instruction groups), 148 // so we have eliminated cost equal to ~120 groups in total. 149 // Let's also assume that we have avg 3 ALU instructions per group, 150 // This means that potential eliminated cost is about 360 single alu inst. 151 // On the other hand, we are speculatively executing conditional code now, 152 // so we are increasing the cost in some cases. In the worst case, we'll 153 // have to execute real_alu_count additional alu instructions instead of 154 // jumping over them. Let's assume for now that average added cost is 155 // 156 // (0.9 * real_alu_count) 157 // 158 // So we should perform if_conversion if 159 // 160 // (0.9 * real_alu_count) < 360, or 161 // 162 // real_alu_count < 400 163 // 164 // So if real_alu_count is more than 400, than we think that if_conversion 165 // doesn't make sense. 166 167 // FIXME: We can use more precise heuristic, taking into account sizes of 168 // the branches and their probability instead of total size. 169 // Another way to improve this is to consider the number of the groups 170 // instead of the number of instructions (taking into account actual VLIW 171 // packing). 172 // (Currently we don't know anything about packing at this stage, but 173 // probably we can make some more precise estimations anyway) 174 175 if (real_alu_count > 400) 176 return false; 177 178 IFC_DUMP( sblog << "if_cvt: processing...\n"; ); 179 180 value *select = get_select_value_for_em(sh, em); 181 182 if (!select) 183 return false; 184 185 for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; ++I) { 186 node *n = *I; 187 188 alu_node *ns = convert_phi(select, n); 189 190 if (ns) 191 r->insert_after(ns); 192 } 193 194 nd2->expand(); 195 nif->expand(); 196 nd1->expand(); 197 r->expand(); 198 199 return true; 200 } 201 run_on(region_node * r)202 bool if_conversion::run_on(region_node* r) { 203 204 if (r->dep_count() != 2 || r->rep_count() != 1) 205 return false; 206 207 depart_node *nd1 = static_cast<depart_node*>(r->first); 208 if (!nd1->is_depart()) 209 return false; 210 if_node *nif = static_cast<if_node*>(nd1->first); 211 if (!nif->is_if()) 212 return false; 213 depart_node *nd2 = static_cast<depart_node*>(nif->first); 214 if (!nd2->is_depart()) 215 return false; 216 217 value* &em = nif->cond; 218 219 convert_kill_instructions(r, em, true, nd2); 220 convert_kill_instructions(r, em, false, nd1); 221 222 if (check_and_convert(r)) 223 return true; 224 225 if (nd2->empty() && nif->next) { 226 // empty true branch, non-empty false branch 227 // we'll invert it to get rid of 'else' 228 229 assert(em && em->def); 230 231 alu_node *predset = static_cast<alu_node*>(em->def); 232 233 // create clone of PREDSET instruction with inverted condition. 234 // PREDSET has 3 dst operands in our IR (value written to gpr, 235 // predicate value and exec mask value), we'll split it such that 236 // new PREDSET will define exec mask value only, and two others will 237 // be defined in the old PREDSET (if they are not used then DCE will 238 // simply remove old PREDSET). 239 240 alu_node *newpredset = sh.clone(predset); 241 predset->insert_after(newpredset); 242 243 predset->dst[2] = NULL; 244 245 newpredset->dst[0] = NULL; 246 newpredset->dst[1] = NULL; 247 248 em->def = newpredset; 249 250 unsigned cc = newpredset->bc.op_ptr->flags & AF_CC_MASK; 251 unsigned cmptype = newpredset->bc.op_ptr->flags & AF_CMP_TYPE_MASK; 252 bool swapargs = false; 253 254 cc = invert_setcc_condition(cc, swapargs); 255 256 if (swapargs) { 257 std::swap(newpredset->src[0], newpredset->src[1]); 258 std::swap(newpredset->bc.src[0], newpredset->bc.src[1]); 259 } 260 261 unsigned newopcode = get_predsetcc_op(cc, cmptype); 262 newpredset->bc.set_op(newopcode); 263 264 // move the code from the 'false' branch ('else') to the 'true' branch 265 nd2->move(nif->next, NULL); 266 267 // swap phi operands 268 for (node_iterator I = r->phi->begin(), E = r->phi->end(); I != E; 269 ++I) { 270 node *p = *I; 271 assert(p->src.size() == 2); 272 std::swap(p->src[0], p->src[1]); 273 } 274 } 275 276 return false; 277 } 278 convert_phi(value * select,node * phi)279 alu_node* if_conversion::convert_phi(value* select, node* phi) { 280 assert(phi->dst.size() == 1 || phi->src.size() == 2); 281 282 value *d = phi->dst[0]; 283 value *v1 = phi->src[0]; 284 value *v2 = phi->src[1]; 285 286 assert(d); 287 288 if (!d->is_any_gpr()) 289 return NULL; 290 291 if (v1->is_undef()) { 292 if (v2->is_undef()) { 293 return NULL; 294 } else { 295 return sh.create_mov(d, v2); 296 } 297 } else if (v2->is_undef()) 298 return sh.create_mov(d, v1); 299 300 alu_node* n = sh.create_alu(); 301 302 n->bc.set_op(ALU_OP3_CNDE_INT); 303 n->dst.push_back(d); 304 n->src.push_back(select); 305 n->src.push_back(v1); 306 n->src.push_back(v2); 307 308 return n; 309 } 310 311 } // namespace r600_sb 312