• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 /** @file brw_vec4_cmod_propagation.cpp
26  *
27  * Really similar to brw_fs_cmod_propagation but adapted to vec4 needs. Check
28  * brw_fs_cmod_propagation for further details on the rationale behind this
29  * optimization.
30  */
31 
32 #include "brw_vec4.h"
33 #include "brw_cfg.h"
34 #include "brw_eu.h"
35 
36 namespace brw {
37 
38 static bool
opt_cmod_propagation_local(bblock_t * block)39 opt_cmod_propagation_local(bblock_t *block)
40 {
41    bool progress = false;
42    int ip = block->end_ip + 1;
43 
44    foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
45       ip--;
46 
47       if ((inst->opcode != BRW_OPCODE_AND &&
48            inst->opcode != BRW_OPCODE_CMP &&
49            inst->opcode != BRW_OPCODE_MOV) ||
50           inst->predicate != BRW_PREDICATE_NONE ||
51           !inst->dst.is_null() ||
52           inst->src[0].file != VGRF ||
53           inst->src[0].abs)
54          continue;
55 
56       if (inst->opcode == BRW_OPCODE_AND &&
57           !(inst->src[1].is_one() &&
58             inst->conditional_mod == BRW_CONDITIONAL_NZ &&
59             !inst->src[0].negate))
60          continue;
61 
62       if (inst->opcode == BRW_OPCODE_CMP && !inst->src[1].is_zero())
63          continue;
64 
65       if (inst->opcode == BRW_OPCODE_MOV &&
66           inst->conditional_mod != BRW_CONDITIONAL_NZ)
67          continue;
68 
69       bool read_flag = false;
70       foreach_inst_in_block_reverse_starting_from(vec4_instruction, scan_inst, inst) {
71          if (regions_overlap(inst->src[0], inst->size_read(0),
72                              scan_inst->dst, scan_inst->size_written)) {
73             if ((scan_inst->predicate && scan_inst->opcode != BRW_OPCODE_SEL) ||
74                 scan_inst->dst.offset != inst->src[0].offset ||
75                 (scan_inst->dst.writemask != WRITEMASK_X &&
76                  scan_inst->dst.writemask != WRITEMASK_XYZW) ||
77                 (scan_inst->dst.writemask == WRITEMASK_XYZW &&
78                  inst->src[0].swizzle != BRW_SWIZZLE_XYZW) ||
79                 (inst->dst.writemask & ~scan_inst->dst.writemask) != 0 ||
80                 scan_inst->exec_size != inst->exec_size ||
81                 scan_inst->group != inst->group) {
82                break;
83             }
84 
85             /* CMP's result is the same regardless of dest type. */
86             if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
87                 scan_inst->opcode == BRW_OPCODE_CMP &&
88                 (inst->dst.type == BRW_REGISTER_TYPE_D ||
89                  inst->dst.type == BRW_REGISTER_TYPE_UD)) {
90                inst->remove(block);
91                progress = true;
92                break;
93             }
94 
95             /* If the AND wasn't handled by the previous case, it isn't safe
96              * to remove it.
97              */
98             if (inst->opcode == BRW_OPCODE_AND)
99                break;
100 
101             /* Comparisons operate differently for ints and floats */
102             if (scan_inst->dst.type != inst->dst.type &&
103                 (scan_inst->dst.type == BRW_REGISTER_TYPE_F ||
104                  inst->dst.type == BRW_REGISTER_TYPE_F))
105                break;
106 
107             /* If the instruction generating inst's source also wrote the
108              * flag, and inst is doing a simple .nz comparison, then inst
109              * is redundant - the appropriate value is already in the flag
110              * register.  Delete inst.
111              */
112             if (inst->conditional_mod == BRW_CONDITIONAL_NZ &&
113                 !inst->src[0].negate &&
114                 scan_inst->writes_flag()) {
115                inst->remove(block);
116                progress = true;
117                break;
118             }
119 
120             /* The conditional mod of the CMP/CMPN instructions behaves
121              * specially because the flag output is not calculated from the
122              * result of the instruction, but the other way around, which
123              * means that even if the condmod to propagate and the condmod
124              * from the CMP instruction are the same they will in general give
125              * different results because they are evaluated based on different
126              * inputs.
127              */
128             if (scan_inst->opcode == BRW_OPCODE_CMP ||
129                 scan_inst->opcode == BRW_OPCODE_CMPN)
130                break;
131 
132             /* From the Sky Lake PRM Vol. 7 "Assigning Conditional Mods":
133              *
134              *    * Note that the [post condition signal] bits generated at
135              *      the output of a compute are before the .sat.
136              */
137             if (scan_inst->saturate)
138                break;
139 
140             /* From the Sky Lake PRM, Vol 2a, "Multiply":
141              *
142              *    "When multiplying integer data types, if one of the sources
143              *    is a DW, the resulting full precision data is stored in
144              *    the accumulator. However, if the destination data type is
145              *    either W or DW, the low bits of the result are written to
146              *    the destination register and the remaining high bits are
147              *    discarded. This results in undefined Overflow and Sign
148              *    flags. Therefore, conditional modifiers and saturation
149              *    (.sat) cannot be used in this case.
150              *
151              * We just disallow cmod propagation on all integer multiplies.
152              */
153             if (!brw_reg_type_is_floating_point(scan_inst->dst.type) &&
154                 scan_inst->opcode == BRW_OPCODE_MUL)
155                break;
156 
157             /* Otherwise, try propagating the conditional. */
158             enum brw_conditional_mod cond =
159                inst->src[0].negate ? brw_swap_cmod(inst->conditional_mod)
160                                    : inst->conditional_mod;
161 
162             if (scan_inst->can_do_cmod() &&
163                 ((!read_flag && scan_inst->conditional_mod == BRW_CONDITIONAL_NONE) ||
164                  scan_inst->conditional_mod == cond)) {
165                scan_inst->conditional_mod = cond;
166                inst->remove(block);
167                progress = true;
168             }
169             break;
170          }
171 
172          if (scan_inst->writes_flag())
173             break;
174 
175          read_flag = read_flag || scan_inst->reads_flag();
176       }
177    }
178 
179    return progress;
180 }
181 
182 bool
opt_cmod_propagation()183 vec4_visitor::opt_cmod_propagation()
184 {
185    bool progress = false;
186 
187    foreach_block_reverse(block, cfg) {
188       progress = opt_cmod_propagation_local(block) || progress;
189    }
190 
191    if (progress)
192       invalidate_live_intervals();
193 
194    return progress;
195 }
196 
197 } /* namespace brw */
198