1 /*
2  * Copyright (C) 2019-2020 Collabora Ltd.
3  * Copyright (C) 2019 Alyssa Rosenzweig
4  * Copyright (C) 2014-2017 Broadcom
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  */
26 
27 #ifndef __PAN_JC_H__
28 #define __PAN_JC_H__
29 
30 #include "genxml/gen_macros.h"
31 #include "pan_pool.h"
32 
33 /* Job chain */
34 struct pan_jc {
35    /* The first job in the chain */
36    uint64_t first_job;
37 
38    /* The number of jobs in the chain, essentially */
39    unsigned job_index;
40 
41    /* A CPU-side pointer to the previous job for next_job linking */
42    struct mali_job_header_packed *prev_job;
43 
44    /* A CPU-side pointer to the first tiler job for dep updates when
45     * injecting a reload tiler job.
46     */
47    struct mali_job_header_packed *first_tiler;
48    uint32_t first_tiler_dep1;
49 
50    /* The dependency for tiler jobs (i.e. the index of the last emitted
51     * tiler job, or zero if none have been emitted) */
52    unsigned tiler_dep;
53 
54    /* The job index of the WRITE_VALUE job (before it has been created) */
55    unsigned write_value_index;
56 };
57 
58 #if defined(PAN_ARCH) && PAN_ARCH <= 9
59 /*
60  * There are various types of Mali jobs:
61  *
62  *  - WRITE_VALUE: generic write primitive, used to zero tiler field
63  *  - VERTEX: runs a vertex shader
64  *  - TILER: runs tiling and sets up a fragment shader
65  *  - FRAGMENT: runs fragment shaders and writes out
66  *  - COMPUTE: runs a compute shader
67  *  - FUSED: vertex+tiler fused together, implicit intradependency (Bifrost)
68  *  - GEOMETRY: runs a geometry shader (unimplemented)
69  *  - CACHE_FLUSH: unseen in the wild, theoretically cache flush
70  *
71  * In between a full batch and a single Mali job is the "job chain", a series
72  * of Mali jobs together forming a linked list. Within the job chain, each Mali
73  * job can set (up to) two dependencies on other earlier jobs in the chain.
74  * This dependency graph forms a scoreboard. The general idea of a scoreboard
75  * applies: when there is a data dependency of job B on job A, job B sets one
76  * of its dependency indices to job A, ensuring that job B won't start until
77  * job A finishes.
78  *
79  * More specifically, here are a set of rules:
80  *
81  * - A write value job must appear if and only if there is at least one tiler
82  *   job, and tiler jobs must depend on it.
83  *
84  * - Vertex jobs and tiler jobs are independent.
85  *
86  * - A tiler job must have a dependency on its data source. If it's getting
87  *   data from a vertex job, it depends on the vertex job. If it's getting data
88  *   from software, this is null.
89  *
90  * - Tiler jobs must depend on the write value job (chained or otherwise).
91  *
92  * - Tiler jobs must be strictly ordered. So each tiler job must depend on the
93  *   previous job in the chain.
94  *
95  * - Jobs linking via next_job has no bearing on order of execution, rather it
96  *   just establishes the linked list of jobs, EXCEPT:
97  *
98  * - A job's dependencies must appear earlier in the linked list (job chain).
99  *
100  * Justification for each rule:
101  *
102  * - Write value jobs are used to write a zero into a magic tiling field, which
103  *   enables tiling to work. If tiling occurs, they are needed; if it does not,
104  *   we cannot emit them since then tiling partially occurs and it's bad.
105  *
106  * - The hardware has no notion of a "vertex/tiler job" (at least not our
107  *   hardware -- other revs have fused jobs, but --- crap, this just got even
108  *   more complicated). They are independent units that take in data, process
109  *   it, and spit out data.
110  *
111  * - Any job must depend on its data source, in fact, or risk a
112  *   read-before-write hazard. Tiler jobs get their data from vertex jobs, ergo
113  *   tiler jobs depend on the corresponding vertex job (if it's there).
114  *
115  * - The tiler is not thread-safe; this dependency prevents race conditions
116  *   between two different jobs trying to write to the tiler outputs at the
117  *   same time.
118  *
119  * - Internally, jobs are scoreboarded; the next job fields just form a linked
120  *   list to allow the jobs to be read in; the execution order is from
121  *   resolving the dependency fields instead.
122  *
123  * - The hardware cannot set a dependency on a job it doesn't know about yet,
124  *   and dependencies are processed in-order of the next job fields.
125  *
126  */
127 
128 /**
129  * Does this job use the tiler? Beyond tiler jobs, index-driven vertex shading
130  * jobs also do.
131  */
132 static inline bool
job_uses_tiling(enum mali_job_type type)133 job_uses_tiling(enum mali_job_type type)
134 {
135 #if PAN_ARCH >= 9
136    if (type == MALI_JOB_TYPE_MALLOC_VERTEX)
137       return true;
138 #endif
139 
140 #if PAN_ARCH >= 6
141    if (type == MALI_JOB_TYPE_INDEXED_VERTEX)
142       return true;
143 #endif
144 
145    return (type == MALI_JOB_TYPE_TILER);
146 }
147 
148 /* Generates, uploads, and queues a a new job. All fields are written in order
149  * except for next_job accounting (TODO: Should we be clever and defer the
150  * upload of the header here until next job to keep the access pattern totally
151  * linear? Or is that just a micro op at this point?). Returns the generated
152  * index for dep management.
153  *
154  * Inject is used to inject a job at the front, for wallpapering. If you are
155  * not wallpapering and set this, dragons will eat you. */
156 
157 static inline unsigned
pan_jc_add_job(struct pan_jc * jc,enum mali_job_type type,bool barrier,bool suppress_prefetch,unsigned local_dep,unsigned global_dep,const struct panfrost_ptr * job,bool inject)158 pan_jc_add_job(struct pan_jc *jc, enum mali_job_type type, bool barrier,
159                bool suppress_prefetch, unsigned local_dep, unsigned global_dep,
160                const struct panfrost_ptr *job, bool inject)
161 {
162    if (job_uses_tiling(type)) {
163       /* Tiler jobs must be chained, and on Midgard, the first tiler
164        * job must depend on the write value job, whose index we
165        * reserve now */
166 
167       if (PAN_ARCH <= 5 && !jc->write_value_index)
168          jc->write_value_index = ++jc->job_index;
169 
170       if (jc->tiler_dep && !inject)
171          global_dep = jc->tiler_dep;
172       else if (PAN_ARCH <= 5)
173          global_dep = jc->write_value_index;
174    }
175 
176    /* Assign the index */
177    unsigned index = ++jc->job_index;
178 
179    pan_cast_and_pack(job->cpu, JOB_HEADER, header) {
180       header.type = type;
181       header.barrier = barrier;
182       header.suppress_prefetch = suppress_prefetch;
183       header.index = index;
184       header.dependency_1 = local_dep;
185       header.dependency_2 = global_dep;
186 
187       if (inject)
188          header.next = jc->first_job;
189    }
190 
191    if (inject) {
192       assert(type == MALI_JOB_TYPE_TILER && "only for blit shaders");
193 
194       if (jc->first_tiler) {
195          /* Manual update of the dep2 field. This is bad,
196           * don't copy this pattern.
197           */
198          jc->first_tiler->opaque[5] =
199             jc->first_tiler_dep1 | (index << 16);
200       }
201 
202       jc->first_tiler = (void *)job->cpu;
203       jc->first_tiler_dep1 = local_dep;
204       jc->first_job = job->gpu;
205       return index;
206    }
207 
208    /* Form a chain */
209    if (job_uses_tiling(type)) {
210       if (!jc->first_tiler) {
211          jc->first_tiler = (void *)job->cpu;
212          jc->first_tiler_dep1 = local_dep;
213       }
214       jc->tiler_dep = index;
215    }
216 
217    if (jc->prev_job) {
218       /* Manual update of the next pointer. This is bad, don't copy
219        * this pattern.
220        * TODO: Find a way to defer last job header emission until we
221        * have a new job to queue or the batch is ready for execution.
222        */
223       jc->prev_job->opaque[6] = job->gpu;
224       jc->prev_job->opaque[7] = job->gpu >> 32;
225    } else {
226       jc->first_job = job->gpu;
227    }
228 
229    jc->prev_job = (struct mali_job_header_packed *)job->cpu;
230    return index;
231 }
232 
233 /* Generates a write value job, used to initialize the tiler structures. Note
234  * this is called right before frame submission. */
235 
236 static inline struct panfrost_ptr
pan_jc_initialize_tiler(struct pan_pool * pool,struct pan_jc * jc,uint64_t polygon_list)237 pan_jc_initialize_tiler(struct pan_pool *pool,
238                         struct pan_jc *jc,
239                         uint64_t polygon_list)
240 {
241    struct panfrost_ptr transfer = {0};
242 
243    /* Check if we even need tiling */
244    if (PAN_ARCH >= 6 || !jc->first_tiler)
245       return transfer;
246 
247    /* Okay, we do. Let's generate it. We'll need the job's polygon list
248     * regardless of size. */
249 
250    transfer = pan_pool_alloc_desc(pool, WRITE_VALUE_JOB);
251 
252    pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, HEADER, header) {
253       header.type = MALI_JOB_TYPE_WRITE_VALUE;
254       header.index = jc->write_value_index;
255       header.next = jc->first_job;
256    }
257 
258    pan_section_pack(transfer.cpu, WRITE_VALUE_JOB, PAYLOAD, payload) {
259       payload.address = polygon_list;
260       payload.type = MALI_WRITE_VALUE_TYPE_ZERO;
261    }
262 
263    jc->first_job = transfer.gpu;
264    return transfer;
265 }
266 #endif /* PAN_ARCH */
267 
268 #endif
269