1 /*
2 * Copyright (C) 2022 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "va_compiler.h"
25 #include "valhall_enums.h"
26
27 /*
28 * Valhall sources may marked as the last use of a register, according
29 * to the following rules:
30 *
31 * 1. The last use of a register should be marked allowing the hardware
32 * to elide register writes.
33 * 2. Staging sources may be read at any time before the asynchronous
34 * instruction completes. If a register is used as both a staging source and
35 * a regular source, the regular source cannot be marked until the program
36 * waits for the asynchronous instruction.
37 * 3. Marking a register pair marks both registers in the pair.
38 *
39 * Last use information follows immediately from (post-RA) liveness analysis:
40 * a register is dead immediately after its last use.
41 *
42 * Staging information follows from scoreboard analysis: do not mark registers
43 * that are read by a pending asynchronous instruction. Note that the Valhall
44 * scoreboard analysis does not track reads, so we handle that with our own
45 * (simplified) scoreboard analysis.
46 *
47 * Register pairs are marked conservatively: if either register in a pair cannot
48 * be marked, do not mark either register.
49 */
50
51 static uint64_t
bi_staging_read_mask(const bi_instr * I)52 bi_staging_read_mask(const bi_instr *I)
53 {
54 uint64_t mask = 0;
55
56 bi_foreach_src(I, s) {
57 if (bi_is_staging_src(I, s) && !bi_is_null(I->src[s])) {
58 assert(I->src[s].type == BI_INDEX_REGISTER);
59 unsigned reg = I->src[s].value;
60 unsigned count = bi_count_read_registers(I, s);
61
62 mask |= (BITFIELD64_MASK(count) << reg);
63 }
64 }
65
66 return mask;
67 }
68
69 static bool
bi_writes_reg(const bi_instr * I,unsigned reg)70 bi_writes_reg(const bi_instr *I, unsigned reg)
71 {
72 bi_foreach_dest(I, d) {
73 if (bi_is_null(I->dest[d]))
74 continue;
75
76 assert(I->dest[d].type == BI_INDEX_REGISTER);
77
78 unsigned count = bi_count_write_registers(I, d);
79
80 if (reg >= I->dest[d].value && (reg - I->dest[d].value) < count)
81 return true;
82 }
83
84 return false;
85 }
86
87 static unsigned
waits_on_slot(enum va_flow flow,unsigned slot)88 waits_on_slot(enum va_flow flow, unsigned slot)
89 {
90 return (flow == VA_FLOW_WAIT) || (flow == VA_FLOW_WAIT0126) ||
91 (va_flow_is_wait_or_none(flow) && (flow & BITFIELD_BIT(slot)));
92 }
93
94 static void
scoreboard_update(struct bi_scoreboard_state * st,const bi_instr * I)95 scoreboard_update(struct bi_scoreboard_state *st, const bi_instr *I)
96 {
97 /* Mark read staging registers */
98 st->read[I->slot] |= bi_staging_read_mask(I);
99
100 /* Unmark registers after they are waited on */
101 for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i) {
102 if (waits_on_slot(I->flow, i))
103 st->read[i] = 0;
104 }
105 }
106
107 static void
va_analyze_scoreboard_reads(bi_context * ctx)108 va_analyze_scoreboard_reads(bi_context *ctx)
109 {
110 u_worklist worklist;
111 bi_worklist_init(ctx, &worklist);
112
113 bi_foreach_block(ctx, block) {
114 bi_worklist_push_tail(&worklist, block);
115
116 /* Reset analysis from previous pass */
117 block->scoreboard_in = (struct bi_scoreboard_state){ 0 };
118 block->scoreboard_out = (struct bi_scoreboard_state){ 0 };
119 }
120
121 /* Perform forward data flow analysis to calculate dependencies */
122 while (!u_worklist_is_empty(&worklist)) {
123 /* Pop from the front for forward analysis */
124 bi_block *blk = bi_worklist_pop_head(&worklist);
125
126 bi_foreach_predecessor(blk, pred) {
127 for (unsigned i = 0; i < VA_NUM_GENERAL_SLOTS; ++i)
128 blk->scoreboard_in.read[i] |= (*pred)->scoreboard_out.read[i];
129 }
130
131 struct bi_scoreboard_state state = blk->scoreboard_in;
132
133 bi_foreach_instr_in_block(blk, I)
134 scoreboard_update(&state, I);
135
136 /* If there was progress, reprocess successors */
137 if (memcmp(&state, &blk->scoreboard_out, sizeof(state)) != 0) {
138 bi_foreach_successor(blk, succ)
139 bi_worklist_push_tail(&worklist, succ);
140 }
141
142 blk->scoreboard_out = state;
143 }
144
145 u_worklist_fini(&worklist);
146 }
147
148 void
va_mark_last(bi_context * ctx)149 va_mark_last(bi_context *ctx)
150 {
151 /* Analyze the shader globally */
152 bi_postra_liveness(ctx);
153 va_analyze_scoreboard_reads(ctx);
154
155 bi_foreach_block(ctx, block) {
156 uint64_t live = block->reg_live_out;
157
158 /* Mark all last uses */
159 bi_foreach_instr_in_block_rev(block, I) {
160 bi_foreach_src(I, s) {
161 if (I->src[s].type != BI_INDEX_REGISTER)
162 continue;
163
164 unsigned nr = bi_count_read_registers(I, s);
165 uint64_t mask = BITFIELD64_MASK(nr) << I->src[s].value;
166
167 /* If the register dead after this instruction, it's the last use */
168 I->src[s].discard = (live & mask) == 0;
169
170 /* If the register is overwritten this cycle, it is implicitly
171 * discarded, but that won't show up in the liveness analysis.
172 */
173 I->src[s].discard |= bi_writes_reg(I, I->src[s].value);
174 }
175
176 live = bi_postra_liveness_ins(live, I);
177 }
178
179 struct bi_scoreboard_state st = block->scoreboard_in;
180
181 bi_foreach_instr_in_block(block, I) {
182 /* Unmark registers read by a pending async instruction */
183 bi_foreach_src(I, s) {
184 if (!I->src[s].discard)
185 continue;
186
187 assert(I->src[s].type == BI_INDEX_REGISTER);
188
189 uint64_t pending_regs = st.read[0] | st.read[1] | st.read[2];
190 bool pending = (pending_regs & BITFIELD64_BIT(I->src[s].value));
191
192 if (bi_is_staging_src(I, s) || pending)
193 I->src[s].discard = false;
194 }
195
196 /* Unmark register pairs where one half must be preserved */
197 bi_foreach_src(I, s) {
198 /* Only look for "real" architectural registers */
199 if (s >= 3)
200 break;
201
202 if (va_src_info(I->op, s).size == VA_SIZE_64) {
203 bool both_discard = I->src[s].discard && I->src[s + 1].discard;
204
205 I->src[s + 0].discard = both_discard;
206 I->src[s + 1].discard = both_discard;
207 }
208 }
209
210 scoreboard_update(&st, I);
211 }
212 }
213 }
214