• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2014-2015 Etnaviv Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Authors:
24  *    Wladimir J. van der Laan <laanwj@gmail.com>
25  */
26 
27 #include "etnaviv_emit.h"
28 
29 #include "etnaviv_blend.h"
30 #include "etnaviv_compiler.h"
31 #include "etnaviv_context.h"
32 #include "etnaviv_rasterizer.h"
33 #include "etnaviv_resource.h"
34 #include "etnaviv_rs.h"
35 #include "etnaviv_screen.h"
36 #include "etnaviv_shader.h"
37 #include "etnaviv_texture.h"
38 #include "etnaviv_translate.h"
39 #include "etnaviv_uniforms.h"
40 #include "etnaviv_util.h"
41 #include "etnaviv_zsa.h"
42 #include "hw/common.xml.h"
43 #include "hw/state.xml.h"
44 #include "hw/state_blt.xml.h"
45 #include "util/u_math.h"
46 
47 /* Queue a STALL command (queues 2 words) */
48 static inline void
CMD_STALL(struct etna_cmd_stream * stream,uint32_t from,uint32_t to)49 CMD_STALL(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
50 {
51    etna_cmd_stream_emit(stream, VIV_FE_STALL_HEADER_OP_STALL);
52    etna_cmd_stream_emit(stream, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
53 }
54 
55 void
etna_stall(struct etna_cmd_stream * stream,uint32_t from,uint32_t to)56 etna_stall(struct etna_cmd_stream *stream, uint32_t from, uint32_t to)
57 {
58    bool blt = (from == SYNC_RECIPIENT_BLT) || (to == SYNC_RECIPIENT_BLT);
59    etna_cmd_stream_reserve(stream, blt ? 8 : 4);
60 
61    if (blt) {
62       etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
63       etna_cmd_stream_emit(stream, 1);
64    }
65 
66    /* TODO: set bit 28/29 of token after BLT COPY_BUFFER */
67    etna_emit_load_state(stream, VIVS_GL_SEMAPHORE_TOKEN >> 2, 1, 0);
68    etna_cmd_stream_emit(stream, VIVS_GL_SEMAPHORE_TOKEN_FROM(from) | VIVS_GL_SEMAPHORE_TOKEN_TO(to));
69 
70    if (from == SYNC_RECIPIENT_FE) {
71       /* if the frontend is to be stalled, queue a STALL frontend command */
72       CMD_STALL(stream, from, to);
73    } else {
74       /* otherwise, load the STALL token state */
75       etna_emit_load_state(stream, VIVS_GL_STALL_TOKEN >> 2, 1, 0);
76       etna_cmd_stream_emit(stream, VIVS_GL_STALL_TOKEN_FROM(from) | VIVS_GL_STALL_TOKEN_TO(to));
77    }
78 
79    if (blt) {
80       etna_emit_load_state(stream, VIVS_BLT_ENABLE >> 2, 1, 0);
81       etna_cmd_stream_emit(stream, 0);
82    }
83 }
84 
85 #define EMIT_STATE(state_name, src_value) \
86    etna_coalsence_emit(stream, &coalesce, VIVS_##state_name, src_value)
87 
88 #define EMIT_STATE_FIXP(state_name, src_value) \
89    etna_coalsence_emit_fixp(stream, &coalesce, VIVS_##state_name, src_value)
90 
91 #define EMIT_STATE_RELOC(state_name, src_value) \
92    etna_coalsence_emit_reloc(stream, &coalesce, VIVS_##state_name, src_value)
93 
94 #define ETNA_3D_CONTEXT_SIZE  (400) /* keep this number above "Total state updates (fixed)" from gen_weave_state tool */
95 
96 static unsigned
required_stream_size(struct etna_context * ctx)97 required_stream_size(struct etna_context *ctx)
98 {
99    unsigned size = ETNA_3D_CONTEXT_SIZE;
100 
101    /* stall + flush */
102    size += 2 + 4;
103 
104    /* vertex elements */
105    size += ctx->vertex_elements->num_elements + 1;
106 
107    /* uniforms - worst case (2 words per uniform load) */
108    size += ctx->shader.vs->uniforms.count * 2;
109    size += ctx->shader.fs->uniforms.count * 2;
110 
111    /* shader */
112    size += ctx->shader_state.vs_inst_mem_size + 1;
113    size += ctx->shader_state.ps_inst_mem_size + 1;
114 
115    /* DRAW_INDEXED_PRIMITIVES command */
116    size += 6;
117 
118    /* reserve for alignment etc. */
119    size += 64;
120 
121    return size;
122 }
123 
124 /* Emit state that only exists on HALTI5+ */
125 static void
emit_halti5_only_state(struct etna_context * ctx,int vs_output_count)126 emit_halti5_only_state(struct etna_context *ctx, int vs_output_count)
127 {
128    struct etna_cmd_stream *stream = ctx->stream;
129    uint32_t dirty = ctx->dirty;
130    struct etna_coalesce coalesce;
131 
132    etna_coalesce_start(stream, &coalesce);
133    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
134       /* Magic states (load balancing, inter-unit sync, buffers) */
135       /*007C4*/ EMIT_STATE(FE_HALTI5_ID_CONFIG, ctx->shader_state.FE_HALTI5_ID_CONFIG);
136       /*00870*/ EMIT_STATE(VS_HALTI5_OUTPUT_COUNT, vs_output_count | ((vs_output_count * 0x10) << 8));
137       /*008A0*/ EMIT_STATE(VS_HALTI5_UNK008A0, 0x0001000e | ((0x110/vs_output_count) << 20));
138       for (int x = 0; x < 4; ++x) {
139          /*008E0*/ EMIT_STATE(VS_HALTI5_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
140       }
141    }
142    if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
143       for (int x = 0; x < 4; ++x) {
144          /*008C0*/ EMIT_STATE(VS_HALTI5_INPUT(x), ctx->shader_state.VS_INPUT[x]);
145       }
146    }
147    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
148       /*00A90*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
149       /*00A94*/ EMIT_STATE(PA_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
150       /*00AA8*/ EMIT_STATE(PA_VS_OUTPUT_COUNT, vs_output_count);
151       /*01080*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(0), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
152       /*01084*/ EMIT_STATE(PS_VARYING_NUM_COMPONENTS(1), ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
153       /*03888*/ EMIT_STATE(GL_HALTI5_SH_SPECIALS, ctx->shader_state.GL_HALTI5_SH_SPECIALS);
154    }
155    etna_coalesce_end(stream, &coalesce);
156 }
157 
158 /* Emit state that no longer exists on HALTI5 */
159 static void
emit_pre_halti5_state(struct etna_context * ctx)160 emit_pre_halti5_state(struct etna_context *ctx)
161 {
162    struct etna_cmd_stream *stream = ctx->stream;
163    uint32_t dirty = ctx->dirty;
164    struct etna_coalesce coalesce;
165 
166    etna_coalesce_start(stream, &coalesce);
167    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
168       /*00800*/ EMIT_STATE(VS_END_PC, ctx->shader_state.VS_END_PC);
169    }
170    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
171       for (int x = 0; x < 4; ++x) {
172         /*00810*/ EMIT_STATE(VS_OUTPUT(x), ctx->shader_state.VS_OUTPUT[x]);
173       }
174    }
175    if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
176       for (int x = 0; x < 4; ++x) {
177         /*00820*/ EMIT_STATE(VS_INPUT(x), ctx->shader_state.VS_INPUT[x]);
178       }
179    }
180    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
181       /*00838*/ EMIT_STATE(VS_START_PC, ctx->shader_state.VS_START_PC);
182    }
183    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
184       for (int x = 0; x < 10; ++x) {
185          /*00A40*/ EMIT_STATE(PA_SHADER_ATTRIBUTES(x), ctx->shader_state.PA_SHADER_ATTRIBUTES[x]);
186       }
187    }
188    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
189       /*00E04*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E04, ctx->framebuffer.RA_MULTISAMPLE_UNK00E04);
190       for (int x = 0; x < 4; ++x) {
191          /*00E10*/ EMIT_STATE(RA_MULTISAMPLE_UNK00E10(x), ctx->framebuffer.RA_MULTISAMPLE_UNK00E10[x]);
192       }
193       for (int x = 0; x < 16; ++x) {
194          /*00E40*/ EMIT_STATE(RA_CENTROID_TABLE(x), ctx->framebuffer.RA_CENTROID_TABLE[x]);
195       }
196    }
197    if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
198       /*01000*/ EMIT_STATE(PS_END_PC, ctx->shader_state.PS_END_PC);
199    }
200    if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
201       /*01018*/ EMIT_STATE(PS_START_PC, ctx->shader_state.PS_START_PC);
202    }
203    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
204       /*03820*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[0]);
205       for (int x = 0; x < 2; ++x) {
206          /*03828*/ EMIT_STATE(GL_VARYING_COMPONENT_USE(x), ctx->shader_state.GL_VARYING_COMPONENT_USE[x]);
207       }
208       /*03834*/ EMIT_STATE(GL_VARYING_NUM_COMPONENTS2, ctx->shader_state.GL_VARYING_NUM_COMPONENTS[1]);
209    }
210    etna_coalesce_end(stream, &coalesce);
211 }
212 
213 /* Weave state before draw operation. This function merges all the compiled
214  * state blocks under the context into one device register state. Parts of
215  * this state that are changed since last call (dirty) will be uploaded as
216  * state changes in the command buffer. */
217 void
etna_emit_state(struct etna_context * ctx)218 etna_emit_state(struct etna_context *ctx)
219 {
220    struct etna_cmd_stream *stream = ctx->stream;
221    struct etna_screen *screen = ctx->screen;
222    unsigned ccw = ctx->rasterizer->front_ccw;
223 
224 
225    /* Pre-reserve the command buffer space which we are likely to need.
226     * This must cover all the state emitted below, and the following
227     * draw command. */
228    etna_cmd_stream_reserve(stream, required_stream_size(ctx));
229 
230    uint32_t dirty = ctx->dirty;
231 
232    /* Pre-processing: see what caches we need to flush before making state changes. */
233    uint32_t to_flush = 0, to_flush_separate = 0;
234    if (unlikely(dirty & (ETNA_DIRTY_BLEND)))
235       to_flush |= VIVS_GL_FLUSH_CACHE_COLOR;
236    if (unlikely(dirty & ETNA_DIRTY_ZSA))
237       to_flush |= VIVS_GL_FLUSH_CACHE_DEPTH;
238    if (unlikely(dirty & (ETNA_DIRTY_TEXTURE_CACHES))) {
239       to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE;
240       to_flush_separate |= VIVS_GL_FLUSH_CACHE_TEXTUREVS;
241    }
242    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) /* Framebuffer config changed? */
243       to_flush |= VIVS_GL_FLUSH_CACHE_COLOR | VIVS_GL_FLUSH_CACHE_DEPTH;
244    if (DBG_ENABLED(ETNA_DBG_CFLUSH_ALL)) {
245       to_flush |= VIVS_GL_FLUSH_CACHE_TEXTURE | VIVS_GL_FLUSH_CACHE_COLOR |
246                   VIVS_GL_FLUSH_CACHE_DEPTH;
247       to_flush_separate |= VIVS_GL_FLUSH_CACHE_TEXTUREVS;
248    }
249 
250    if (to_flush) {
251       etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush);
252       if (to_flush_separate)
253          etna_set_state(stream, VIVS_GL_FLUSH_CACHE, to_flush_separate);
254       etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
255    }
256 
257    /* Flush TS cache before changing TS configuration. */
258    if (unlikely(dirty & ETNA_DIRTY_TS)) {
259       etna_set_state(stream, VIVS_TS_FLUSH_CACHE, VIVS_TS_FLUSH_CACHE_FLUSH);
260    }
261 
262    /* Update vertex elements. This is different from any of the other states, in that
263     * a) the number of vertex elements written matters: so write only active ones
264     * b) the vertex element states must all be written: do not skip entries that stay the same */
265    if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
266       if (screen->specs.halti >= 5) {
267          /*17800*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG0(0),
268             ctx->vertex_elements->num_elements,
269             ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG0);
270          /*17A00*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_SCALE(0),
271             ctx->vertex_elements->num_elements,
272             ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
273          /*17A80*/ etna_set_state_multi(stream, VIVS_NFE_GENERIC_ATTRIB_CONFIG1(0),
274             ctx->vertex_elements->num_elements,
275             ctx->vertex_elements->NFE_GENERIC_ATTRIB_CONFIG1);
276       } else {
277          /* Special case: vertex elements must always be sent in full if changed */
278          /*00600*/ etna_set_state_multi(stream, VIVS_FE_VERTEX_ELEMENT_CONFIG(0),
279             ctx->vertex_elements->num_elements,
280             ctx->vertex_elements->FE_VERTEX_ELEMENT_CONFIG);
281          if (screen->specs.halti >= 2) {
282             /*00780*/ etna_set_state_multi(stream, VIVS_FE_GENERIC_ATTRIB_SCALE(0),
283                ctx->vertex_elements->num_elements,
284                ctx->vertex_elements->NFE_GENERIC_ATTRIB_SCALE);
285          }
286       }
287    }
288    unsigned vs_output_count = etna_rasterizer_state(ctx->rasterizer)->point_size_per_vertex
289                            ? ctx->shader_state.VS_OUTPUT_COUNT_PSIZE
290                            : ctx->shader_state.VS_OUTPUT_COUNT;
291 
292    /* The following code is originally generated by gen_merge_state.py, to
293     * emit state in increasing order of address (this makes it possible to merge
294     * consecutive register updates into one SET_STATE command)
295     *
296     * There have been some manual changes, where the weaving operation is not
297     * simply bitwise or:
298     * - scissor fixp
299     * - num vertex elements
300     * - scissor handling
301     * - num samplers
302     * - texture lod
303     * - ETNA_DIRTY_TS
304     * - removed ETNA_DIRTY_BASE_SETUP statements -- these are guaranteed to not
305     * change anyway
306     * - PS / framebuffer interaction for MSAA
307     * - move update of GL_MULTI_SAMPLE_CONFIG first
308     * - add unlikely()/likely()
309     */
310    struct etna_coalesce coalesce;
311 
312    etna_coalesce_start(stream, &coalesce);
313 
314    /* begin only EMIT_STATE -- make sure no new etna_reserve calls are done here
315     * directly
316     *    or indirectly */
317    /* multi sample config is set first, and outside of the normal sorting
318     * order, as changing the multisample state clobbers PS.INPUT_COUNT (and
319     * possibly PS.TEMP_REGISTER_CONTROL).
320     */
321    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_SAMPLE_MASK))) {
322       uint32_t val = VIVS_GL_MULTI_SAMPLE_CONFIG_MSAA_ENABLES(ctx->sample_mask);
323       val |= ctx->framebuffer.GL_MULTI_SAMPLE_CONFIG;
324 
325       /*03818*/ EMIT_STATE(GL_MULTI_SAMPLE_CONFIG, val);
326    }
327    if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
328       /*00644*/ EMIT_STATE_RELOC(FE_INDEX_STREAM_BASE_ADDR, &ctx->index_buffer.FE_INDEX_STREAM_BASE_ADDR);
329       /*00648*/ EMIT_STATE(FE_INDEX_STREAM_CONTROL, ctx->index_buffer.FE_INDEX_STREAM_CONTROL);
330    }
331    if (likely(dirty & (ETNA_DIRTY_INDEX_BUFFER))) {
332       /*00674*/ EMIT_STATE(FE_PRIMITIVE_RESTART_INDEX, ctx->index_buffer.FE_PRIMITIVE_RESTART_INDEX);
333    }
334    if (likely(dirty & (ETNA_DIRTY_VERTEX_BUFFERS))) {
335       if (screen->specs.halti >= 2) { /* HALTI2+: NFE_VERTEX_STREAMS */
336          for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
337             /*14600*/ EMIT_STATE_RELOC(NFE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
338          }
339       } else if(screen->specs.stream_count > 1) { /* hw w/ multiple vertex streams */
340          for (int x = 0; x < ctx->vertex_buffer.count; ++x) {
341             /*00680*/ EMIT_STATE_RELOC(FE_VERTEX_STREAMS_BASE_ADDR(x), &ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR);
342          }
343       } else { /* hw w/ single vertex stream */
344          /*0064C*/ EMIT_STATE_RELOC(FE_VERTEX_STREAM_BASE_ADDR, &ctx->vertex_buffer.cvb[0].FE_VERTEX_STREAM_BASE_ADDR);
345       }
346    }
347    /* gallium has instance divisor as part of elements state */
348    if (dirty & (ETNA_DIRTY_VERTEX_ELEMENTS)) {
349       for (int x = 0; x < ctx->vertex_elements->num_buffers; ++x) {
350          if (ctx->vertex_buffer.cvb[x].FE_VERTEX_STREAM_BASE_ADDR.bo) {
351             if (screen->specs.halti >= 2)
352                /*14640*/ EMIT_STATE(NFE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_elements->FE_VERTEX_STREAM_CONTROL[x]);
353             else if (screen->specs.stream_count > 1)
354                /*006A0*/ EMIT_STATE(FE_VERTEX_STREAMS_CONTROL(x), ctx->vertex_elements->FE_VERTEX_STREAM_CONTROL[x]);
355             else
356                /*00650*/ EMIT_STATE(FE_VERTEX_STREAM_CONTROL, ctx->vertex_elements->FE_VERTEX_STREAM_CONTROL[0]);
357          }
358       }
359       if (screen->specs.halti >= 2) {
360          for (int x = 0; x < ctx->vertex_elements->num_buffers; ++x) {
361             /*14680*/ EMIT_STATE(NFE_VERTEX_STREAMS_VERTEX_DIVISOR(x), ctx->vertex_elements->NFE_VERTEX_STREAMS_VERTEX_DIVISOR[x]);
362          }
363       }
364    }
365 
366    if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_RASTERIZER))) {
367 
368       /*00804*/ EMIT_STATE(VS_OUTPUT_COUNT, vs_output_count);
369    }
370    if (unlikely(dirty & (ETNA_DIRTY_VERTEX_ELEMENTS | ETNA_DIRTY_SHADER))) {
371       /*00808*/ EMIT_STATE(VS_INPUT_COUNT, ctx->shader_state.VS_INPUT_COUNT);
372       /*0080C*/ EMIT_STATE(VS_TEMP_REGISTER_CONTROL, ctx->shader_state.VS_TEMP_REGISTER_CONTROL);
373    }
374    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
375       /*00830*/ EMIT_STATE(VS_LOAD_BALANCING, ctx->shader_state.VS_LOAD_BALANCING);
376    }
377    if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
378       /*00A00*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_X, ctx->viewport.PA_VIEWPORT_SCALE_X);
379       /*00A04*/ EMIT_STATE_FIXP(PA_VIEWPORT_SCALE_Y, ctx->viewport.PA_VIEWPORT_SCALE_Y);
380       /*00A08*/ EMIT_STATE(PA_VIEWPORT_SCALE_Z, ctx->viewport.PA_VIEWPORT_SCALE_Z);
381       /*00A0C*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_X, ctx->viewport.PA_VIEWPORT_OFFSET_X);
382       /*00A10*/ EMIT_STATE_FIXP(PA_VIEWPORT_OFFSET_Y, ctx->viewport.PA_VIEWPORT_OFFSET_Y);
383       /*00A14*/ EMIT_STATE(PA_VIEWPORT_OFFSET_Z, ctx->viewport.PA_VIEWPORT_OFFSET_Z);
384    }
385    if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
386       struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
387 
388       /*00A18*/ EMIT_STATE(PA_LINE_WIDTH, rasterizer->PA_LINE_WIDTH);
389       /*00A1C*/ EMIT_STATE(PA_POINT_SIZE, rasterizer->PA_POINT_SIZE);
390       /*00A28*/ EMIT_STATE(PA_SYSTEM_MODE, rasterizer->PA_SYSTEM_MODE);
391    }
392    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
393       /*00A30*/ EMIT_STATE(PA_ATTRIBUTE_ELEMENT_COUNT, ctx->shader_state.PA_ATTRIBUTE_ELEMENT_COUNT);
394    }
395    if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_SHADER))) {
396       uint32_t val = etna_rasterizer_state(ctx->rasterizer)->PA_CONFIG;
397       /*00A34*/ EMIT_STATE(PA_CONFIG, val & ctx->shader_state.PA_CONFIG);
398    }
399    if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
400       struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
401       /*00A38*/ EMIT_STATE(PA_WIDE_LINE_WIDTH0, rasterizer->PA_LINE_WIDTH);
402       /*00A3C*/ EMIT_STATE(PA_WIDE_LINE_WIDTH1, rasterizer->PA_LINE_WIDTH);
403    }
404    if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {
405       /*00C00*/ EMIT_STATE_FIXP(SE_SCISSOR_LEFT, ctx->clipping.minx << 16);
406       /*00C04*/ EMIT_STATE_FIXP(SE_SCISSOR_TOP, ctx->clipping.miny << 16);
407       /*00C08*/ EMIT_STATE_FIXP(SE_SCISSOR_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_SCISSOR_MARGIN_RIGHT);
408       /*00C0C*/ EMIT_STATE_FIXP(SE_SCISSOR_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_SCISSOR_MARGIN_BOTTOM);
409    }
410    if (unlikely(dirty & (ETNA_DIRTY_RASTERIZER))) {
411       struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
412 
413       /*00C10*/ EMIT_STATE(SE_DEPTH_SCALE, rasterizer->SE_DEPTH_SCALE);
414       /*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
415       /*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
416    }
417    if (unlikely(dirty & (ETNA_DIRTY_SCISSOR_CLIP))) {
418       /*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, (ctx->clipping.maxx << 16) + ETNA_SE_CLIP_MARGIN_RIGHT);
419       /*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, (ctx->clipping.maxy << 16) + ETNA_SE_CLIP_MARGIN_BOTTOM);
420    }
421    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
422       /*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
423    }
424    if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
425       /*00E08*/ EMIT_STATE(RA_EARLY_DEPTH, etna_zsa_state(ctx->zsa)->RA_DEPTH_CONFIG);
426    }
427    if (unlikely(dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_FRAMEBUFFER))) {
428       /*01004*/ EMIT_STATE(PS_OUTPUT_REG, ctx->shader_state.PS_OUTPUT_REG);
429       /*01008*/ EMIT_STATE(PS_INPUT_COUNT,
430                            ctx->framebuffer.msaa_mode
431                               ? ctx->shader_state.PS_INPUT_COUNT_MSAA
432                               : ctx->shader_state.PS_INPUT_COUNT);
433       /*0100C*/ EMIT_STATE(PS_TEMP_REGISTER_CONTROL,
434                            ctx->framebuffer.msaa_mode
435                               ? ctx->shader_state.PS_TEMP_REGISTER_CONTROL_MSAA
436                               : ctx->shader_state.PS_TEMP_REGISTER_CONTROL);
437       /*01010*/ EMIT_STATE(PS_CONTROL, ctx->framebuffer.PS_CONTROL);
438       /*01030*/ EMIT_STATE(PS_CONTROL_EXT, ctx->framebuffer.PS_CONTROL_EXT);
439    }
440    if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_FRAMEBUFFER))) {
441       /*01400*/ EMIT_STATE(PE_DEPTH_CONFIG, (etna_zsa_state(ctx->zsa)->PE_DEPTH_CONFIG |
442                                              ctx->framebuffer.PE_DEPTH_CONFIG));
443    }
444    if (unlikely(dirty & (ETNA_DIRTY_VIEWPORT))) {
445       /*01404*/ EMIT_STATE(PE_DEPTH_NEAR, ctx->viewport.PE_DEPTH_NEAR);
446       /*01408*/ EMIT_STATE(PE_DEPTH_FAR, ctx->viewport.PE_DEPTH_FAR);
447    }
448    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
449       /*0140C*/ EMIT_STATE(PE_DEPTH_NORMALIZE, ctx->framebuffer.PE_DEPTH_NORMALIZE);
450 
451       if (screen->specs.halti < 0 || screen->model == 0x880) {
452          /*01410*/ EMIT_STATE_RELOC(PE_DEPTH_ADDR, &ctx->framebuffer.PE_DEPTH_ADDR);
453       }
454 
455       /*01414*/ EMIT_STATE(PE_DEPTH_STRIDE, ctx->framebuffer.PE_DEPTH_STRIDE);
456    }
457 
458    if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
459       uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_OP[ccw];
460       /*01418*/ EMIT_STATE(PE_STENCIL_OP, val);
461    }
462    if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER))) {
463       uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG[ccw];
464       /*0141C*/ EMIT_STATE(PE_STENCIL_CONFIG, val | ctx->stencil_ref.PE_STENCIL_CONFIG[ccw]);
465    }
466    if (unlikely(dirty & (ETNA_DIRTY_ZSA))) {
467       uint32_t val = etna_zsa_state(ctx->zsa)->PE_ALPHA_OP;
468       /*01420*/ EMIT_STATE(PE_ALPHA_OP, val);
469    }
470    if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR))) {
471       /*01424*/ EMIT_STATE(PE_ALPHA_BLEND_COLOR, ctx->blend_color.PE_ALPHA_BLEND_COLOR);
472    }
473    if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
474       uint32_t val = etna_blend_state(ctx->blend)->PE_ALPHA_CONFIG;
475       /*01428*/ EMIT_STATE(PE_ALPHA_CONFIG, val);
476    }
477    if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
478       uint32_t val;
479       /* Use the components and overwrite bits in framebuffer.PE_COLOR_FORMAT
480        * as a mask to enable the bits from blend PE_COLOR_FORMAT */
481       val = ~(VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
482               VIVS_PE_COLOR_FORMAT_OVERWRITE);
483       val |= etna_blend_state(ctx->blend)->PE_COLOR_FORMAT;
484       val &= ctx->framebuffer.PE_COLOR_FORMAT;
485       /*0142C*/ EMIT_STATE(PE_COLOR_FORMAT, val);
486    }
487    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER))) {
488       if (screen->specs.halti >= 0 && screen->model != 0x880) {
489          /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
490          /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
491          /*01460*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(0), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[0]);
492          /*01464*/ EMIT_STATE_RELOC(PE_PIPE_COLOR_ADDR(1), &ctx->framebuffer.PE_PIPE_COLOR_ADDR[1]);
493          /*01480*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(0), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[0]);
494          /*01484*/ EMIT_STATE_RELOC(PE_PIPE_DEPTH_ADDR(1), &ctx->framebuffer.PE_PIPE_DEPTH_ADDR[1]);
495       } else {
496          /*01430*/ EMIT_STATE_RELOC(PE_COLOR_ADDR, &ctx->framebuffer.PE_COLOR_ADDR);
497          /*01434*/ EMIT_STATE(PE_COLOR_STRIDE, ctx->framebuffer.PE_COLOR_STRIDE);
498          /*01454*/ EMIT_STATE(PE_HDEPTH_CONTROL, ctx->framebuffer.PE_HDEPTH_CONTROL);
499       }
500    }
501    if (unlikely(dirty & (ETNA_DIRTY_STENCIL_REF | ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_ZSA))) {
502       uint32_t val = etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT;
503       if (!ctx->zsa->stencil[1].enabled &&
504           ctx->zsa->stencil[0].enabled &&
505           ctx->zsa->stencil[0].valuemask)
506 	  val |= ctx->stencil_ref.PE_STENCIL_CONFIG_EXT[!ccw];
507       else
508 	  val |= ctx->stencil_ref.PE_STENCIL_CONFIG_EXT[ccw];
509       /*014A0*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT, val);
510    }
511    if (unlikely(dirty & (ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER))) {
512       struct etna_blend_state *blend = etna_blend_state(ctx->blend);
513       /*014A4*/ EMIT_STATE(PE_LOGIC_OP, blend->PE_LOGIC_OP | ctx->framebuffer.PE_LOGIC_OP);
514    }
515    if (unlikely(dirty & (ETNA_DIRTY_BLEND))) {
516       struct etna_blend_state *blend = etna_blend_state(ctx->blend);
517       for (int x = 0; x < 2; ++x) {
518          /*014A8*/ EMIT_STATE(PE_DITHER(x), blend->PE_DITHER[x]);
519       }
520    }
521    if (unlikely(dirty & (ETNA_DIRTY_BLEND_COLOR)) &&
522        VIV_FEATURE(screen, chipMinorFeatures1, HALF_FLOAT)) {
523          /*014B0*/ EMIT_STATE(PE_ALPHA_COLOR_EXT0, ctx->blend_color.PE_ALPHA_COLOR_EXT0);
524          /*014B4*/ EMIT_STATE(PE_ALPHA_COLOR_EXT1, ctx->blend_color.PE_ALPHA_COLOR_EXT1);
525    }
526    if (unlikely(dirty & (ETNA_DIRTY_ZSA | ETNA_DIRTY_RASTERIZER))) {
527       /*014B8*/ EMIT_STATE(PE_STENCIL_CONFIG_EXT2, etna_zsa_state(ctx->zsa)->PE_STENCIL_CONFIG_EXT2[ccw]);
528    }
529    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER)) && screen->specs.halti >= 3)
530       /*014BC*/ EMIT_STATE(PE_MEM_CONFIG, ctx->framebuffer.PE_MEM_CONFIG);
531    if (unlikely(dirty & (ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_TS))) {
532       /*01654*/ EMIT_STATE(TS_MEM_CONFIG, ctx->framebuffer.TS_MEM_CONFIG);
533       /*01658*/ EMIT_STATE_RELOC(TS_COLOR_STATUS_BASE, &ctx->framebuffer.TS_COLOR_STATUS_BASE);
534       /*0165C*/ EMIT_STATE_RELOC(TS_COLOR_SURFACE_BASE, &ctx->framebuffer.TS_COLOR_SURFACE_BASE);
535       /*01660*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE, ctx->framebuffer.TS_COLOR_CLEAR_VALUE);
536       /*01664*/ EMIT_STATE_RELOC(TS_DEPTH_STATUS_BASE, &ctx->framebuffer.TS_DEPTH_STATUS_BASE);
537       /*01668*/ EMIT_STATE_RELOC(TS_DEPTH_SURFACE_BASE, &ctx->framebuffer.TS_DEPTH_SURFACE_BASE);
538       /*0166C*/ EMIT_STATE(TS_DEPTH_CLEAR_VALUE, ctx->framebuffer.TS_DEPTH_CLEAR_VALUE);
539       /*016BC*/ EMIT_STATE(TS_COLOR_CLEAR_VALUE_EXT, ctx->framebuffer.TS_COLOR_CLEAR_VALUE_EXT);
540    }
541    if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
542       /*0381C*/ EMIT_STATE(GL_VARYING_TOTAL_COMPONENTS, ctx->shader_state.GL_VARYING_TOTAL_COMPONENTS);
543    }
544    etna_coalesce_end(stream, &coalesce);
545    /* end only EMIT_STATE */
546 
547    /* Emit strongly architecture-specific state */
548    if (screen->specs.halti >= 5)
549       emit_halti5_only_state(ctx, vs_output_count);
550    else
551       emit_pre_halti5_state(ctx);
552 
553    /* Beginning from Halti0 some of the new shader and sampler states are not
554     * self-synchronizing anymore. Thus we need to stall the FE on PE completion
555     * before loading the new states to avoid corrupting the state of the
556     * in-flight draw.
557     */
558    if (screen->specs.halti >= 0 &&
559        (ctx->dirty & (ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF |
560                       ETNA_DIRTY_SAMPLERS | ETNA_DIRTY_SAMPLER_VIEWS)))
561       etna_stall(ctx->stream, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
562 
563    ctx->emit_texture_state(ctx);
564 
565    /* We need to update the uniform cache only if one of the following bits are
566     * set in ctx->dirty:
567     * - ETNA_DIRTY_SHADER
568     * - ETNA_DIRTY_CONSTBUF
569     * - uniforms_dirty_bits
570     *
571     * In case of ETNA_DIRTY_SHADER we need load all uniforms from the cache. In
572     * all
573     * other cases we can load on the changed uniforms.
574     */
575    static const uint32_t uniform_dirty_bits =
576       ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;
577 
578    /**** Large dynamically-sized state ****/
579    bool do_uniform_flush = screen->specs.halti < 5;
580    if (dirty & (ETNA_DIRTY_SHADER)) {
581       /* Special case: a new shader was loaded; simply re-load all uniforms and
582        * shader code at once */
583       /* This sequence is special, do not change ordering unless necessary. According to comment
584          snippets in the Vivante kernel driver a process called "steering" goes on while programming
585          shader state. This (as I understand it) means certain unified states are "steered"
586          toward a specific shader unit (VS/PS/...) based on either explicit flags in register
587          00860, or what other state is written before "auto-steering". So this means some
588          state can legitimately be programmed multiple times.
589        */
590 
591       if (screen->specs.halti >= 5) { /* ICACHE (HALTI5) */
592          assert(ctx->shader_state.VS_INST_ADDR.bo && ctx->shader_state.PS_INST_ADDR.bo);
593          /* Set icache (VS) */
594          etna_set_state(stream, VIVS_VS_NEWRANGE_LOW, 0);
595          etna_set_state(stream, VIVS_VS_NEWRANGE_HIGH, ctx->shader_state.vs_inst_mem_size / 4);
596          assert(ctx->shader_state.VS_INST_ADDR.bo);
597          etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
598          etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
599          etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
600          etna_set_state(stream, VIVS_VS_ICACHE_COUNT, ctx->shader_state.vs_inst_mem_size / 4 - 1);
601 
602          /* Set icache (PS) */
603          etna_set_state(stream, VIVS_PS_NEWRANGE_LOW, 0);
604          etna_set_state(stream, VIVS_PS_NEWRANGE_HIGH, ctx->shader_state.ps_inst_mem_size / 4);
605          assert(ctx->shader_state.PS_INST_ADDR.bo);
606          etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
607          etna_set_state(stream, VIVS_SH_CONFIG, 0x00000002);
608          etna_set_state(stream, VIVS_VS_ICACHE_CONTROL, VIVS_VS_ICACHE_CONTROL_ENABLE);
609          etna_set_state(stream, VIVS_PS_ICACHE_COUNT, ctx->shader_state.ps_inst_mem_size / 4 - 1);
610 
611       } else if (ctx->shader_state.VS_INST_ADDR.bo || ctx->shader_state.PS_INST_ADDR.bo) {
612          /* ICACHE (pre-HALTI5) */
613          assert(screen->specs.has_icache && screen->specs.has_shader_range_registers);
614          /* Set icache (VS) */
615          etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
616          etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
617                VIVS_VS_ICACHE_CONTROL_ENABLE |
618                VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
619          assert(ctx->shader_state.VS_INST_ADDR.bo);
620          etna_set_state_reloc(stream, VIVS_VS_INST_ADDR, &ctx->shader_state.VS_INST_ADDR);
621 
622          /* Set icache (PS) */
623          etna_set_state(stream, VIVS_PS_RANGE, (ctx->shader_state.ps_inst_mem_size / 4 - 1) << 16);
624          etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
625                VIVS_VS_ICACHE_CONTROL_ENABLE |
626                VIVS_VS_ICACHE_CONTROL_FLUSH_PS);
627          assert(ctx->shader_state.PS_INST_ADDR.bo);
628          etna_set_state_reloc(stream, VIVS_PS_INST_ADDR, &ctx->shader_state.PS_INST_ADDR);
629       } else {
630          /* Upload shader directly, first flushing and disabling icache if
631           * supported on this hw */
632          if (screen->specs.has_icache) {
633             etna_set_state(stream, VIVS_VS_ICACHE_CONTROL,
634                   VIVS_VS_ICACHE_CONTROL_FLUSH_PS |
635                   VIVS_VS_ICACHE_CONTROL_FLUSH_VS);
636          }
637          if (screen->specs.has_shader_range_registers) {
638             etna_set_state(stream, VIVS_VS_RANGE, (ctx->shader_state.vs_inst_mem_size / 4 - 1) << 16);
639             etna_set_state(stream, VIVS_PS_RANGE, ((ctx->shader_state.ps_inst_mem_size / 4 - 1 + 0x100) << 16) |
640                                         0x100);
641          }
642          etna_set_state_multi(stream, screen->specs.vs_offset,
643                               ctx->shader_state.vs_inst_mem_size,
644                               ctx->shader_state.VS_INST_MEM);
645          etna_set_state_multi(stream, screen->specs.ps_offset,
646                               ctx->shader_state.ps_inst_mem_size,
647                               ctx->shader_state.PS_INST_MEM);
648       }
649 
650       if (screen->specs.has_unified_uniforms) {
651          etna_set_state(stream, VIVS_VS_UNIFORM_BASE, 0);
652          etna_set_state(stream, VIVS_PS_UNIFORM_BASE, screen->specs.max_vs_uniforms);
653       }
654 
655       if (do_uniform_flush)
656          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
657 
658       etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);
659 
660       if (do_uniform_flush)
661          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
662 
663       etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
664 
665       if (screen->specs.halti >= 5) {
666          /* HALTI5 needs to be prompted to pre-fetch shaders */
667          etna_set_state(stream, VIVS_VS_ICACHE_PREFETCH, 0x00000000);
668          etna_set_state(stream, VIVS_PS_ICACHE_PREFETCH, 0x00000000);
669          etna_stall(stream, SYNC_RECIPIENT_RA, SYNC_RECIPIENT_PE);
670       }
671    } else {
672       /* ideally this cache would only be flushed if there are VS uniform changes */
673       if (do_uniform_flush)
674          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH);
675 
676       if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
677          etna_uniforms_write(ctx, ctx->shader.vs, ctx->constant_buffer[PIPE_SHADER_VERTEX].cb);
678 
679       /* ideally this cache would only be flushed if there are PS uniform changes */
680       if (do_uniform_flush)
681          etna_set_state(stream, VIVS_VS_UNIFORM_CACHE, VIVS_VS_UNIFORM_CACHE_FLUSH | VIVS_VS_UNIFORM_CACHE_PS);
682 
683       if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
684          etna_uniforms_write(ctx, ctx->shader.fs, ctx->constant_buffer[PIPE_SHADER_FRAGMENT].cb);
685    }
686 /**** End of state update ****/
687 #undef EMIT_STATE
688 #undef EMIT_STATE_FIXP
689 #undef EMIT_STATE_RELOC
690    ctx->dirty = 0;
691    ctx->dirty_sampler_views = 0;
692 }
693