1 /*
2 * Copyright (C) 2019-2020 Collabora Ltd.
3 * Copyright (C) 2019 Alyssa Rosenzweig
4 * Copyright (C) 2014-2017 Broadcom
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 */
26
27 #ifndef __PAN_SCOREBOARD_H__
28 #define __PAN_SCOREBOARD_H__
29
30 #include "genxml/gen_macros.h"
31 #include "pan_pool.h"
32
33 struct pan_scoreboard {
34 /* The first job in the batch */
35 mali_ptr first_job;
36
37 /* The number of jobs in the primary batch, essentially */
38 unsigned job_index;
39
40 /* A CPU-side pointer to the previous job for next_job linking */
41 struct mali_job_header_packed *prev_job;
42
43 /* A CPU-side pointer to the first tiler job for dep updates when
44 * injecting a reload tiler job.
45 */
46 struct mali_job_header_packed *first_tiler;
47 uint32_t first_tiler_dep1;
48
49 /* The dependency for tiler jobs (i.e. the index of the last emitted
50 * tiler job, or zero if none have been emitted) */
51 unsigned tiler_dep;
52
53 /* The job index of the WRITE_VALUE job (before it has been created) */
54 unsigned write_value_index;
55 };
56
57 #ifdef PAN_ARCH
58 /*
59 * There are various types of Mali jobs:
60 *
61 * - WRITE_VALUE: generic write primitive, used to zero tiler field
62 * - VERTEX: runs a vertex shader
63 * - TILER: runs tiling and sets up a fragment shader
64 * - FRAGMENT: runs fragment shaders and writes out
65 * - COMPUTE: runs a compute shader
66 * - FUSED: vertex+tiler fused together, implicit intradependency (Bifrost)
67 * - GEOMETRY: runs a geometry shader (unimplemented)
68 * - CACHE_FLUSH: unseen in the wild, theoretically cache flush
69 *
70 * In between a full batch and a single Mali job is the "job chain", a series
71 * of Mali jobs together forming a linked list. Within the job chain, each Mali
72 * job can set (up to) two dependencies on other earlier jobs in the chain.
73 * This dependency graph forms a scoreboard. The general idea of a scoreboard
74 * applies: when there is a data dependency of job B on job A, job B sets one
75 * of its dependency indices to job A, ensuring that job B won't start until
76 * job A finishes.
77 *
78 * More specifically, here are a set of rules:
79 *
80 * - A write value job must appear if and only if there is at least one tiler
81 * job, and tiler jobs must depend on it.
82 *
83 * - Vertex jobs and tiler jobs are independent.
84 *
85 * - A tiler job must have a dependency on its data source. If it's getting
86 * data from a vertex job, it depends on the vertex job. If it's getting data
87 * from software, this is null.
88 *
89 * - Tiler jobs must depend on the write value job (chained or otherwise).
90 *
91 * - Tiler jobs must be strictly ordered. So each tiler job must depend on the
92 * previous job in the chain.
93 *
94 * - Jobs linking via next_job has no bearing on order of execution, rather it
95 * just establishes the linked list of jobs, EXCEPT:
96 *
97 * - A job's dependencies must appear earlier in the linked list (job chain).
98 *
99 * Justification for each rule:
100 *
101 * - Write value jobs are used to write a zero into a magic tiling field, which
102 * enables tiling to work. If tiling occurs, they are needed; if it does not,
103 * we cannot emit them since then tiling partially occurs and it's bad.
104 *
105 * - The hardware has no notion of a "vertex/tiler job" (at least not our
106 * hardware -- other revs have fused jobs, but --- crap, this just got even
107 * more complicated). They are independent units that take in data, process
108 * it, and spit out data.
109 *
110 * - Any job must depend on its data source, in fact, or risk a
111 * read-before-write hazard. Tiler jobs get their data from vertex jobs, ergo
112 * tiler jobs depend on the corresponding vertex job (if it's there).
113 *
114 * - The tiler is not thread-safe; this dependency prevents race conditions
115 * between two different jobs trying to write to the tiler outputs at the
116 * same time.
117 *
118 * - Internally, jobs are scoreboarded; the next job fields just form a linked
119 * list to allow the jobs to be read in; the execution order is from
120 * resolving the dependency fields instead.
121 *
122 * - The hardware cannot set a dependency on a job it doesn't know about yet,
123 * and dependencies are processed in-order of the next job fields.
124 *
125 */
126
127 /* Generates, uploads, and queues a a new job. All fields are written in order
128 * except for next_job accounting (TODO: Should we be clever and defer the
129 * upload of the header here until next job to keep the access pattern totally
130 * linear? Or is that just a micro op at this point?). Returns the generated
131 * index for dep management.
132 *
133 * Inject is used to inject a job at the front, for wallpapering. If you are
134 * not wallpapering and set this, dragons will eat you. */
135
136 static inline unsigned
panfrost_add_job(struct pan_pool * pool,struct pan_scoreboard * scoreboard,enum mali_job_type type,bool barrier,bool suppress_prefetch,unsigned local_dep,unsigned global_dep,const struct panfrost_ptr * job,bool inject)137 panfrost_add_job(struct pan_pool *pool,
138 struct pan_scoreboard *scoreboard,
139 enum mali_job_type type,
140 bool barrier, bool suppress_prefetch,
141 unsigned local_dep, unsigned global_dep,
142 const struct panfrost_ptr *job,
143 bool inject)
144 {
145 if (type == MALI_JOB_TYPE_TILER) {
146 /* Tiler jobs must be chained, and on Midgard, the first tiler
147 * job must depend on the write value job, whose index we
148 * reserve now */
149
150 if (PAN_ARCH <= 5 && !scoreboard->write_value_index)
151 scoreboard->write_value_index = ++scoreboard->job_index;
152
153 if (scoreboard->tiler_dep && !inject)
154 global_dep = scoreboard->tiler_dep;
155 else if (PAN_ARCH <= 5)
156 global_dep = scoreboard->write_value_index;
157 }
158
159 /* Assign the index */
160 unsigned index = ++scoreboard->job_index;
161
162 pan_pack(job->cpu, JOB_HEADER, header) {
163 header.type = type;
164 header.barrier = barrier;
165 header.suppress_prefetch = suppress_prefetch;
166 header.index = index;
167 header.dependency_1 = local_dep;
168 header.dependency_2 = global_dep;
169
170 if (inject)
171 header.next = scoreboard->first_job;
172 }
173
174 if (inject) {
175 assert(type == MALI_JOB_TYPE_TILER && "only for blit shaders");
176
177 if (scoreboard->first_tiler) {
178 /* Manual update of the dep2 field. This is bad,
179 * don't copy this pattern.
180 */
181 scoreboard->first_tiler->opaque[5] =
182 scoreboard->first_tiler_dep1 | (index << 16);
183 }
184
185 scoreboard->first_tiler = (void *)job->cpu;
186 scoreboard->first_tiler_dep1 = local_dep;
187 scoreboard->first_job = job->gpu;
188 return index;
189 }
190
191 /* Form a chain */
192 if (type == MALI_JOB_TYPE_TILER) {
193 if (!scoreboard->first_tiler) {
194 scoreboard->first_tiler = (void *)job->cpu;
195 scoreboard->first_tiler_dep1 = local_dep;
196 }
197 scoreboard->tiler_dep = index;
198 }
199
200 if (scoreboard->prev_job) {
201 /* Manual update of the next pointer. This is bad, don't copy
202 * this pattern.
203 * TODO: Find a way to defer last job header emission until we
204 * have a new job to queue or the batch is ready for execution.
205 */
206 scoreboard->prev_job->opaque[6] = job->gpu;
207 scoreboard->prev_job->opaque[7] = job->gpu >> 32;
208 } else {
209 scoreboard->first_job = job->gpu;
210 }
211
212 scoreboard->prev_job = (struct mali_job_header_packed *)job->cpu;
213 return index;
214 }
215
216 /* Generates a write value job, used to initialize the tiler structures. Note
217 * this is called right before frame submission. */
218
219 static inline struct panfrost_ptr
panfrost_scoreboard_initialize_tiler(struct pan_pool * pool,struct pan_scoreboard * scoreboard,mali_ptr polygon_list)220 panfrost_scoreboard_initialize_tiler(struct pan_pool *pool,
221 struct pan_scoreboard *scoreboard,
222 mali_ptr polygon_list)
223 {
224 struct panfrost_ptr transfer = { 0 };
225
226 /* Check if we even need tiling */
227 if (PAN_ARCH >= 6 || !scoreboard->first_tiler)
228 return transfer;
229
230 /* Okay, we do. Let's generate it. We'll need the job's polygon list
231 * regardless of size. */
232
233 transfer = pan_pool_alloc_desc(pool, WRITE_VALUE_JOB);
234
235 pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, HEADER, header) {
236 header.type = MALI_JOB_TYPE_WRITE_VALUE;
237 header.index = scoreboard->write_value_index;
238 header.next = scoreboard->first_job;
239 }
240
241 pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) {
242 payload.address = polygon_list;
243 payload.type = MALI_WRITE_VALUE_TYPE_ZERO;
244 }
245
246 scoreboard->first_job = transfer.gpu;
247 return transfer;
248 }
249 #endif /* PAN_ARCH */
250
251 #endif
252