• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /****************************************************************************
2  * Copyright (C) 2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  ***************************************************************************/
23 
24 // llvm redefines DEBUG
25 #pragma push_macro("DEBUG")
26 #undef DEBUG
27 #include "JitManager.h"
28 #pragma pop_macro("DEBUG")
29 
30 #include "common/os.h"
31 #include "jit_api.h"
32 #include "state_llvm.h"
33 
34 #include "gallivm/lp_bld_tgsi.h"
35 #include "util/u_format.h"
36 
37 #include "util/u_memory.h"
38 #include "util/u_inlines.h"
39 #include "util/u_helpers.h"
40 #include "util/u_framebuffer.h"
41 #include "util/u_viewport.h"
42 
43 #include "swr_state.h"
44 #include "swr_context.h"
45 #include "swr_context_llvm.h"
46 #include "swr_screen.h"
47 #include "swr_resource.h"
48 #include "swr_tex_sample.h"
49 #include "swr_scratch.h"
50 #include "swr_shader.h"
51 #include "swr_fence.h"
52 
53 /* These should be pulled out into separate files as necessary
54  * Just initializing everything here to get going. */
55 
56 static void *
swr_create_blend_state(struct pipe_context * pipe,const struct pipe_blend_state * blend)57 swr_create_blend_state(struct pipe_context *pipe,
58                        const struct pipe_blend_state *blend)
59 {
60    struct swr_blend_state *state = CALLOC_STRUCT(swr_blend_state);
61 
62    memcpy(&state->pipe, blend, sizeof(*blend));
63 
64    struct pipe_blend_state *pipe_blend = &state->pipe;
65 
66    for (int target = 0;
67         target < std::min(SWR_NUM_RENDERTARGETS, PIPE_MAX_COLOR_BUFS);
68         target++) {
69 
70       struct pipe_rt_blend_state *rt_blend = &pipe_blend->rt[target];
71       SWR_RENDER_TARGET_BLEND_STATE &blendState =
72          state->blendState.renderTarget[target];
73       RENDER_TARGET_BLEND_COMPILE_STATE &compileState =
74          state->compileState[target];
75 
76       if (target != 0 && !pipe_blend->independent_blend_enable) {
77          memcpy(&compileState,
78                 &state->compileState[0],
79                 sizeof(RENDER_TARGET_BLEND_COMPILE_STATE));
80          continue;
81       }
82 
83       compileState.blendEnable = rt_blend->blend_enable;
84       if (compileState.blendEnable) {
85          compileState.sourceAlphaBlendFactor =
86             swr_convert_blend_factor(rt_blend->alpha_src_factor);
87          compileState.destAlphaBlendFactor =
88             swr_convert_blend_factor(rt_blend->alpha_dst_factor);
89          compileState.sourceBlendFactor =
90             swr_convert_blend_factor(rt_blend->rgb_src_factor);
91          compileState.destBlendFactor =
92             swr_convert_blend_factor(rt_blend->rgb_dst_factor);
93 
94          compileState.colorBlendFunc =
95             swr_convert_blend_func(rt_blend->rgb_func);
96          compileState.alphaBlendFunc =
97             swr_convert_blend_func(rt_blend->alpha_func);
98       }
99       compileState.logicOpEnable = state->pipe.logicop_enable;
100       if (compileState.logicOpEnable) {
101          compileState.logicOpFunc =
102             swr_convert_logic_op(state->pipe.logicop_func);
103       }
104 
105       blendState.writeDisableRed =
106          (rt_blend->colormask & PIPE_MASK_R) ? 0 : 1;
107       blendState.writeDisableGreen =
108          (rt_blend->colormask & PIPE_MASK_G) ? 0 : 1;
109       blendState.writeDisableBlue =
110          (rt_blend->colormask & PIPE_MASK_B) ? 0 : 1;
111       blendState.writeDisableAlpha =
112          (rt_blend->colormask & PIPE_MASK_A) ? 0 : 1;
113 
114       if (rt_blend->colormask == 0)
115          compileState.blendEnable = false;
116    }
117 
118    return state;
119 }
120 
121 static void
swr_bind_blend_state(struct pipe_context * pipe,void * blend)122 swr_bind_blend_state(struct pipe_context *pipe, void *blend)
123 {
124    struct swr_context *ctx = swr_context(pipe);
125 
126    if (ctx->blend == blend)
127       return;
128 
129    ctx->blend = (swr_blend_state *)blend;
130 
131    ctx->dirty |= SWR_NEW_BLEND;
132 }
133 
134 static void
swr_delete_blend_state(struct pipe_context * pipe,void * blend)135 swr_delete_blend_state(struct pipe_context *pipe, void *blend)
136 {
137    FREE(blend);
138 }
139 
140 static void
swr_set_blend_color(struct pipe_context * pipe,const struct pipe_blend_color * color)141 swr_set_blend_color(struct pipe_context *pipe,
142                     const struct pipe_blend_color *color)
143 {
144    struct swr_context *ctx = swr_context(pipe);
145 
146    ctx->blend_color = *color;
147 
148    ctx->dirty |= SWR_NEW_BLEND;
149 }
150 
151 static void
swr_set_stencil_ref(struct pipe_context * pipe,const struct pipe_stencil_ref * ref)152 swr_set_stencil_ref(struct pipe_context *pipe,
153                     const struct pipe_stencil_ref *ref)
154 {
155    struct swr_context *ctx = swr_context(pipe);
156 
157    ctx->stencil_ref = *ref;
158 
159    ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
160 }
161 
162 static void *
swr_create_depth_stencil_state(struct pipe_context * pipe,const struct pipe_depth_stencil_alpha_state * depth_stencil)163 swr_create_depth_stencil_state(
164    struct pipe_context *pipe,
165    const struct pipe_depth_stencil_alpha_state *depth_stencil)
166 {
167    struct pipe_depth_stencil_alpha_state *state;
168 
169    state = (pipe_depth_stencil_alpha_state *)mem_dup(depth_stencil,
170                                                      sizeof *depth_stencil);
171 
172    return state;
173 }
174 
175 static void
swr_bind_depth_stencil_state(struct pipe_context * pipe,void * depth_stencil)176 swr_bind_depth_stencil_state(struct pipe_context *pipe, void *depth_stencil)
177 {
178    struct swr_context *ctx = swr_context(pipe);
179 
180    if (ctx->depth_stencil == (pipe_depth_stencil_alpha_state *)depth_stencil)
181       return;
182 
183    ctx->depth_stencil = (pipe_depth_stencil_alpha_state *)depth_stencil;
184 
185    ctx->dirty |= SWR_NEW_DEPTH_STENCIL_ALPHA;
186 }
187 
188 static void
swr_delete_depth_stencil_state(struct pipe_context * pipe,void * depth)189 swr_delete_depth_stencil_state(struct pipe_context *pipe, void *depth)
190 {
191    FREE(depth);
192 }
193 
194 
195 static void *
swr_create_rasterizer_state(struct pipe_context * pipe,const struct pipe_rasterizer_state * rast)196 swr_create_rasterizer_state(struct pipe_context *pipe,
197                             const struct pipe_rasterizer_state *rast)
198 {
199    struct pipe_rasterizer_state *state;
200    state = (pipe_rasterizer_state *)mem_dup(rast, sizeof *rast);
201 
202    return state;
203 }
204 
205 static void
swr_bind_rasterizer_state(struct pipe_context * pipe,void * handle)206 swr_bind_rasterizer_state(struct pipe_context *pipe, void *handle)
207 {
208    struct swr_context *ctx = swr_context(pipe);
209    const struct pipe_rasterizer_state *rasterizer =
210       (const struct pipe_rasterizer_state *)handle;
211 
212    if (ctx->rasterizer == (pipe_rasterizer_state *)rasterizer)
213       return;
214 
215    ctx->rasterizer = (pipe_rasterizer_state *)rasterizer;
216 
217    ctx->dirty |= SWR_NEW_RASTERIZER;
218 }
219 
220 static void
swr_delete_rasterizer_state(struct pipe_context * pipe,void * rasterizer)221 swr_delete_rasterizer_state(struct pipe_context *pipe, void *rasterizer)
222 {
223    FREE(rasterizer);
224 }
225 
226 
227 static void *
swr_create_sampler_state(struct pipe_context * pipe,const struct pipe_sampler_state * sampler)228 swr_create_sampler_state(struct pipe_context *pipe,
229                          const struct pipe_sampler_state *sampler)
230 {
231    struct pipe_sampler_state *state =
232       (pipe_sampler_state *)mem_dup(sampler, sizeof *sampler);
233 
234    return state;
235 }
236 
237 static void
swr_bind_sampler_states(struct pipe_context * pipe,enum pipe_shader_type shader,unsigned start,unsigned num,void ** samplers)238 swr_bind_sampler_states(struct pipe_context *pipe,
239                         enum pipe_shader_type shader,
240                         unsigned start,
241                         unsigned num,
242                         void **samplers)
243 {
244    struct swr_context *ctx = swr_context(pipe);
245    unsigned i;
246 
247    assert(shader < PIPE_SHADER_TYPES);
248    assert(start + num <= ARRAY_SIZE(ctx->samplers[shader]));
249 
250    /* set the new samplers */
251    ctx->num_samplers[shader] = num;
252    for (i = 0; i < num; i++) {
253       ctx->samplers[shader][start + i] = (pipe_sampler_state *)samplers[i];
254    }
255 
256    ctx->dirty |= SWR_NEW_SAMPLER;
257 }
258 
259 static void
swr_delete_sampler_state(struct pipe_context * pipe,void * sampler)260 swr_delete_sampler_state(struct pipe_context *pipe, void *sampler)
261 {
262    FREE(sampler);
263 }
264 
265 
266 static struct pipe_sampler_view *
swr_create_sampler_view(struct pipe_context * pipe,struct pipe_resource * texture,const struct pipe_sampler_view * templ)267 swr_create_sampler_view(struct pipe_context *pipe,
268                         struct pipe_resource *texture,
269                         const struct pipe_sampler_view *templ)
270 {
271    struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
272 
273    if (view) {
274       *view = *templ;
275       view->reference.count = 1;
276       view->texture = NULL;
277       pipe_resource_reference(&view->texture, texture);
278       view->context = pipe;
279    }
280 
281    return view;
282 }
283 
284 static void
swr_set_sampler_views(struct pipe_context * pipe,enum pipe_shader_type shader,unsigned start,unsigned num,struct pipe_sampler_view ** views)285 swr_set_sampler_views(struct pipe_context *pipe,
286                       enum pipe_shader_type shader,
287                       unsigned start,
288                       unsigned num,
289                       struct pipe_sampler_view **views)
290 {
291    struct swr_context *ctx = swr_context(pipe);
292    uint i;
293 
294    assert(num <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
295 
296    assert(shader < PIPE_SHADER_TYPES);
297    assert(start + num <= ARRAY_SIZE(ctx->sampler_views[shader]));
298 
299    /* set the new sampler views */
300    ctx->num_sampler_views[shader] = num;
301    for (i = 0; i < num; i++) {
302       /* Note: we're using pipe_sampler_view_release() here to work around
303        * a possible crash when the old view belongs to another context that
304        * was already destroyed.
305        */
306       pipe_sampler_view_release(pipe, &ctx->sampler_views[shader][start + i]);
307       pipe_sampler_view_reference(&ctx->sampler_views[shader][start + i],
308                                   views[i]);
309    }
310 
311    ctx->dirty |= SWR_NEW_SAMPLER_VIEW;
312 }
313 
314 static void
swr_sampler_view_destroy(struct pipe_context * pipe,struct pipe_sampler_view * view)315 swr_sampler_view_destroy(struct pipe_context *pipe,
316                          struct pipe_sampler_view *view)
317 {
318    pipe_resource_reference(&view->texture, NULL);
319    FREE(view);
320 }
321 
322 static void *
swr_create_vs_state(struct pipe_context * pipe,const struct pipe_shader_state * vs)323 swr_create_vs_state(struct pipe_context *pipe,
324                     const struct pipe_shader_state *vs)
325 {
326    struct swr_vertex_shader *swr_vs = new swr_vertex_shader;
327    if (!swr_vs)
328       return NULL;
329 
330    swr_vs->pipe.tokens = tgsi_dup_tokens(vs->tokens);
331    swr_vs->pipe.stream_output = vs->stream_output;
332 
333    lp_build_tgsi_info(vs->tokens, &swr_vs->info);
334 
335    swr_vs->soState = {0};
336 
337    if (swr_vs->pipe.stream_output.num_outputs) {
338       pipe_stream_output_info *stream_output = &swr_vs->pipe.stream_output;
339 
340       swr_vs->soState.soEnable = true;
341       // soState.rasterizerDisable set on state dirty
342       // soState.streamToRasterizer not used
343 
344       for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
345          swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
346             1 << (stream_output->output[i].register_index - 1);
347       }
348       for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
349         swr_vs->soState.streamNumEntries[i] =
350              _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
351        }
352    }
353 
354    return swr_vs;
355 }
356 
357 static void
swr_bind_vs_state(struct pipe_context * pipe,void * vs)358 swr_bind_vs_state(struct pipe_context *pipe, void *vs)
359 {
360    struct swr_context *ctx = swr_context(pipe);
361 
362    if (ctx->vs == vs)
363       return;
364 
365    ctx->vs = (swr_vertex_shader *)vs;
366    ctx->dirty |= SWR_NEW_VS;
367 }
368 
369 static void
swr_delete_vs_state(struct pipe_context * pipe,void * vs)370 swr_delete_vs_state(struct pipe_context *pipe, void *vs)
371 {
372    struct swr_vertex_shader *swr_vs = (swr_vertex_shader *)vs;
373    FREE((void *)swr_vs->pipe.tokens);
374    struct swr_screen *screen = swr_screen(pipe->screen);
375 
376    /* Defer deletion of vs state */
377    swr_fence_work_delete_vs(screen->flush_fence, swr_vs);
378 }
379 
380 static void *
swr_create_fs_state(struct pipe_context * pipe,const struct pipe_shader_state * fs)381 swr_create_fs_state(struct pipe_context *pipe,
382                     const struct pipe_shader_state *fs)
383 {
384    struct swr_fragment_shader *swr_fs = new swr_fragment_shader;
385    if (!swr_fs)
386       return NULL;
387 
388    swr_fs->pipe.tokens = tgsi_dup_tokens(fs->tokens);
389 
390    lp_build_tgsi_info(fs->tokens, &swr_fs->info);
391 
392    return swr_fs;
393 }
394 
395 
396 static void
swr_bind_fs_state(struct pipe_context * pipe,void * fs)397 swr_bind_fs_state(struct pipe_context *pipe, void *fs)
398 {
399    struct swr_context *ctx = swr_context(pipe);
400 
401    if (ctx->fs == fs)
402       return;
403 
404    ctx->fs = (swr_fragment_shader *)fs;
405    ctx->dirty |= SWR_NEW_FS;
406 }
407 
408 static void
swr_delete_fs_state(struct pipe_context * pipe,void * fs)409 swr_delete_fs_state(struct pipe_context *pipe, void *fs)
410 {
411    struct swr_fragment_shader *swr_fs = (swr_fragment_shader *)fs;
412    FREE((void *)swr_fs->pipe.tokens);
413    struct swr_screen *screen = swr_screen(pipe->screen);
414 
415    /* Defer deleton of fs state */
416    swr_fence_work_delete_fs(screen->flush_fence, swr_fs);
417 }
418 
419 
420 static void
swr_set_constant_buffer(struct pipe_context * pipe,uint shader,uint index,const struct pipe_constant_buffer * cb)421 swr_set_constant_buffer(struct pipe_context *pipe,
422                         uint shader,
423                         uint index,
424                         const struct pipe_constant_buffer *cb)
425 {
426    struct swr_context *ctx = swr_context(pipe);
427    struct pipe_resource *constants = cb ? cb->buffer : NULL;
428 
429    assert(shader < PIPE_SHADER_TYPES);
430    assert(index < ARRAY_SIZE(ctx->constants[shader]));
431 
432    /* note: reference counting */
433    util_copy_constant_buffer(&ctx->constants[shader][index], cb);
434 
435    if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_GEOMETRY) {
436       ctx->dirty |= SWR_NEW_VSCONSTANTS;
437    } else if (shader == PIPE_SHADER_FRAGMENT) {
438       ctx->dirty |= SWR_NEW_FSCONSTANTS;
439    }
440 
441    if (cb && cb->user_buffer) {
442       pipe_resource_reference(&constants, NULL);
443    }
444 }
445 
446 
447 static void *
swr_create_vertex_elements_state(struct pipe_context * pipe,unsigned num_elements,const struct pipe_vertex_element * attribs)448 swr_create_vertex_elements_state(struct pipe_context *pipe,
449                                  unsigned num_elements,
450                                  const struct pipe_vertex_element *attribs)
451 {
452    struct swr_vertex_element_state *velems;
453    assert(num_elements <= PIPE_MAX_ATTRIBS);
454    velems = CALLOC_STRUCT(swr_vertex_element_state);
455    if (velems) {
456       velems->fsState.bVertexIDOffsetEnable = true;
457       velems->fsState.numAttribs = num_elements;
458       for (unsigned i = 0; i < num_elements; i++) {
459          // XXX: we should do this keyed on the VS usage info
460 
461          const struct util_format_description *desc =
462             util_format_description(attribs[i].src_format);
463 
464          velems->fsState.layout[i].AlignedByteOffset = attribs[i].src_offset;
465          velems->fsState.layout[i].Format =
466             mesa_to_swr_format(attribs[i].src_format);
467          velems->fsState.layout[i].StreamIndex =
468             attribs[i].vertex_buffer_index;
469          velems->fsState.layout[i].InstanceEnable =
470             attribs[i].instance_divisor != 0;
471          velems->fsState.layout[i].ComponentControl0 =
472             desc->channel[0].type != UTIL_FORMAT_TYPE_VOID
473             ? ComponentControl::StoreSrc
474             : ComponentControl::Store0;
475          velems->fsState.layout[i].ComponentControl1 =
476             desc->channel[1].type != UTIL_FORMAT_TYPE_VOID
477             ? ComponentControl::StoreSrc
478             : ComponentControl::Store0;
479          velems->fsState.layout[i].ComponentControl2 =
480             desc->channel[2].type != UTIL_FORMAT_TYPE_VOID
481             ? ComponentControl::StoreSrc
482             : ComponentControl::Store0;
483          velems->fsState.layout[i].ComponentControl3 =
484             desc->channel[3].type != UTIL_FORMAT_TYPE_VOID
485             ? ComponentControl::StoreSrc
486             : ComponentControl::Store1Fp;
487          velems->fsState.layout[i].ComponentPacking = ComponentEnable::XYZW;
488          velems->fsState.layout[i].InstanceDataStepRate =
489             attribs[i].instance_divisor;
490 
491          /* Calculate the pitch of each stream */
492          const SWR_FORMAT_INFO &swr_desc = GetFormatInfo(
493             mesa_to_swr_format(attribs[i].src_format));
494          velems->stream_pitch[attribs[i].vertex_buffer_index] += swr_desc.Bpp;
495 
496          if (attribs[i].instance_divisor != 0) {
497             velems->instanced_bufs |= 1U << attribs[i].vertex_buffer_index;
498             uint32_t *min_instance_div =
499                &velems->min_instance_div[attribs[i].vertex_buffer_index];
500             if (!*min_instance_div ||
501                 attribs[i].instance_divisor < *min_instance_div)
502                *min_instance_div = attribs[i].instance_divisor;
503          }
504       }
505    }
506 
507    return velems;
508 }
509 
510 static void
swr_bind_vertex_elements_state(struct pipe_context * pipe,void * velems)511 swr_bind_vertex_elements_state(struct pipe_context *pipe, void *velems)
512 {
513    struct swr_context *ctx = swr_context(pipe);
514    struct swr_vertex_element_state *swr_velems =
515       (struct swr_vertex_element_state *)velems;
516 
517    ctx->velems = swr_velems;
518    ctx->dirty |= SWR_NEW_VERTEX;
519 }
520 
521 static void
swr_delete_vertex_elements_state(struct pipe_context * pipe,void * velems)522 swr_delete_vertex_elements_state(struct pipe_context *pipe, void *velems)
523 {
524    /* XXX Need to destroy fetch shader? */
525    FREE(velems);
526 }
527 
528 
529 static void
swr_set_vertex_buffers(struct pipe_context * pipe,unsigned start_slot,unsigned num_elements,const struct pipe_vertex_buffer * buffers)530 swr_set_vertex_buffers(struct pipe_context *pipe,
531                        unsigned start_slot,
532                        unsigned num_elements,
533                        const struct pipe_vertex_buffer *buffers)
534 {
535    struct swr_context *ctx = swr_context(pipe);
536 
537    assert(num_elements <= PIPE_MAX_ATTRIBS);
538 
539    util_set_vertex_buffers_count(ctx->vertex_buffer,
540                                  &ctx->num_vertex_buffers,
541                                  buffers,
542                                  start_slot,
543                                  num_elements);
544 
545    ctx->dirty |= SWR_NEW_VERTEX;
546 }
547 
548 
549 static void
swr_set_index_buffer(struct pipe_context * pipe,const struct pipe_index_buffer * ib)550 swr_set_index_buffer(struct pipe_context *pipe,
551                      const struct pipe_index_buffer *ib)
552 {
553    struct swr_context *ctx = swr_context(pipe);
554 
555    if (ib)
556       memcpy(&ctx->index_buffer, ib, sizeof(ctx->index_buffer));
557    else
558       memset(&ctx->index_buffer, 0, sizeof(ctx->index_buffer));
559 
560    ctx->dirty |= SWR_NEW_VERTEX;
561 }
562 
563 static void
swr_set_polygon_stipple(struct pipe_context * pipe,const struct pipe_poly_stipple * stipple)564 swr_set_polygon_stipple(struct pipe_context *pipe,
565                         const struct pipe_poly_stipple *stipple)
566 {
567    struct swr_context *ctx = swr_context(pipe);
568 
569    ctx->poly_stipple = *stipple; /* struct copy */
570    ctx->dirty |= SWR_NEW_STIPPLE;
571 }
572 
573 static void
swr_set_clip_state(struct pipe_context * pipe,const struct pipe_clip_state * clip)574 swr_set_clip_state(struct pipe_context *pipe,
575                    const struct pipe_clip_state *clip)
576 {
577    struct swr_context *ctx = swr_context(pipe);
578 
579    ctx->clip = *clip;
580    /* XXX Unimplemented, but prevents crash */
581 
582    ctx->dirty |= SWR_NEW_CLIP;
583 }
584 
585 
586 static void
swr_set_scissor_states(struct pipe_context * pipe,unsigned start_slot,unsigned num_viewports,const struct pipe_scissor_state * scissor)587 swr_set_scissor_states(struct pipe_context *pipe,
588                        unsigned start_slot,
589                        unsigned num_viewports,
590                        const struct pipe_scissor_state *scissor)
591 {
592    struct swr_context *ctx = swr_context(pipe);
593 
594    ctx->scissor = *scissor;
595    ctx->swr_scissor.xmin = scissor->minx;
596    ctx->swr_scissor.xmax = scissor->maxx;
597    ctx->swr_scissor.ymin = scissor->miny;
598    ctx->swr_scissor.ymax = scissor->maxy;
599    ctx->dirty |= SWR_NEW_SCISSOR;
600 }
601 
602 static void
swr_set_viewport_states(struct pipe_context * pipe,unsigned start_slot,unsigned num_viewports,const struct pipe_viewport_state * vpt)603 swr_set_viewport_states(struct pipe_context *pipe,
604                         unsigned start_slot,
605                         unsigned num_viewports,
606                         const struct pipe_viewport_state *vpt)
607 {
608    struct swr_context *ctx = swr_context(pipe);
609 
610    ctx->viewport = *vpt;
611    ctx->dirty |= SWR_NEW_VIEWPORT;
612 }
613 
614 
615 static void
swr_set_framebuffer_state(struct pipe_context * pipe,const struct pipe_framebuffer_state * fb)616 swr_set_framebuffer_state(struct pipe_context *pipe,
617                           const struct pipe_framebuffer_state *fb)
618 {
619    struct swr_context *ctx = swr_context(pipe);
620 
621    boolean changed = !util_framebuffer_state_equal(&ctx->framebuffer, fb);
622 
623    assert(fb->width <= KNOB_GUARDBAND_WIDTH);
624    assert(fb->height <= KNOB_GUARDBAND_HEIGHT);
625 
626    if (changed) {
627       util_copy_framebuffer_state(&ctx->framebuffer, fb);
628 
629       ctx->dirty |= SWR_NEW_FRAMEBUFFER;
630    }
631 }
632 
633 
634 static void
swr_set_sample_mask(struct pipe_context * pipe,unsigned sample_mask)635 swr_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
636 {
637    struct swr_context *ctx = swr_context(pipe);
638 
639    if (sample_mask != ctx->sample_mask) {
640       ctx->sample_mask = sample_mask;
641       ctx->dirty |= SWR_NEW_RASTERIZER;
642    }
643 }
644 
645 /*
646  * Update resource in-use status
647  * All resources bound to color or depth targets marked as WRITE resources.
648  * VBO Vertex/index buffers and texture views marked as READ resources.
649  */
650 void
swr_update_resource_status(struct pipe_context * pipe,const struct pipe_draw_info * p_draw_info)651 swr_update_resource_status(struct pipe_context *pipe,
652                            const struct pipe_draw_info *p_draw_info)
653 {
654    struct swr_context *ctx = swr_context(pipe);
655    struct pipe_framebuffer_state *fb = &ctx->framebuffer;
656 
657    /* colorbuffer targets */
658    if (fb->nr_cbufs)
659       for (uint32_t i = 0; i < fb->nr_cbufs; ++i)
660          if (fb->cbufs[i])
661             swr_resource_write(fb->cbufs[i]->texture);
662 
663    /* depth/stencil target */
664    if (fb->zsbuf)
665       swr_resource_write(fb->zsbuf->texture);
666 
667    /* VBO vertex buffers */
668    for (uint32_t i = 0; i < ctx->num_vertex_buffers; i++) {
669       struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
670       if (!vb->user_buffer)
671          swr_resource_read(vb->buffer);
672    }
673 
674    /* VBO index buffer */
675    if (p_draw_info && p_draw_info->indexed) {
676       struct pipe_index_buffer *ib = &ctx->index_buffer;
677       if (!ib->user_buffer)
678          swr_resource_read(ib->buffer);
679    }
680 
681    /* transform feedback buffers */
682    for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
683       struct pipe_stream_output_target *target = ctx->so_targets[i];
684       if (target && target->buffer)
685          swr_resource_write(target->buffer);
686    }
687 
688    /* texture sampler views */
689    for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
690       for (uint32_t i = 0; i < ctx->num_sampler_views[j]; i++) {
691          struct pipe_sampler_view *view = ctx->sampler_views[j][i];
692          if (view)
693             swr_resource_read(view->texture);
694       }
695    }
696 
697    /* constant buffers */
698    for (uint32_t j : {PIPE_SHADER_VERTEX, PIPE_SHADER_FRAGMENT}) {
699       for (uint32_t i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
700          struct pipe_constant_buffer *cb = &ctx->constants[j][i];
701          if (cb->buffer)
702             swr_resource_read(cb->buffer);
703       }
704    }
705 }
706 
707 static void
swr_update_texture_state(struct swr_context * ctx,unsigned shader_type,unsigned num_sampler_views,swr_jit_texture * textures)708 swr_update_texture_state(struct swr_context *ctx,
709                          unsigned shader_type,
710                          unsigned num_sampler_views,
711                          swr_jit_texture *textures)
712 {
713    for (unsigned i = 0; i < num_sampler_views; i++) {
714       struct pipe_sampler_view *view =
715          ctx->sampler_views[shader_type][i];
716       struct swr_jit_texture *jit_tex = &textures[i];
717 
718       memset(jit_tex, 0, sizeof(*jit_tex));
719       if (view) {
720          struct pipe_resource *res = view->texture;
721          struct swr_resource *swr_res = swr_resource(res);
722          SWR_SURFACE_STATE *swr = &swr_res->swr;
723          size_t *mip_offsets = swr_res->mip_offsets;
724          if (swr_res->has_depth && swr_res->has_stencil &&
725             !util_format_has_depth(util_format_description(view->format))) {
726             swr = &swr_res->secondary;
727             mip_offsets = swr_res->secondary_mip_offsets;
728          }
729 
730          jit_tex->width = res->width0;
731          jit_tex->height = res->height0;
732          jit_tex->base_ptr = swr->pBaseAddress;
733          if (view->target != PIPE_BUFFER) {
734             jit_tex->first_level = view->u.tex.first_level;
735             jit_tex->last_level = view->u.tex.last_level;
736             if (view->target == PIPE_TEXTURE_3D)
737                jit_tex->depth = res->depth0;
738             else
739                jit_tex->depth =
740                   view->u.tex.last_layer - view->u.tex.first_layer + 1;
741             jit_tex->base_ptr += view->u.tex.first_layer *
742                swr->qpitch * swr->pitch;
743          } else {
744             unsigned view_blocksize = util_format_get_blocksize(view->format);
745             jit_tex->base_ptr += view->u.buf.offset;
746             jit_tex->width = view->u.buf.size / view_blocksize;
747             jit_tex->depth = 1;
748          }
749 
750          for (unsigned level = jit_tex->first_level;
751               level <= jit_tex->last_level;
752               level++) {
753             jit_tex->row_stride[level] = swr->pitch;
754             jit_tex->img_stride[level] = swr->qpitch * swr->pitch;
755             jit_tex->mip_offsets[level] = mip_offsets[level];
756          }
757       }
758    }
759 }
760 
761 static void
swr_update_sampler_state(struct swr_context * ctx,unsigned shader_type,unsigned num_samplers,swr_jit_sampler * samplers)762 swr_update_sampler_state(struct swr_context *ctx,
763                          unsigned shader_type,
764                          unsigned num_samplers,
765                          swr_jit_sampler *samplers)
766 {
767    for (unsigned i = 0; i < num_samplers; i++) {
768       const struct pipe_sampler_state *sampler =
769          ctx->samplers[shader_type][i];
770 
771       if (sampler) {
772          samplers[i].min_lod = sampler->min_lod;
773          samplers[i].max_lod = sampler->max_lod;
774          samplers[i].lod_bias = sampler->lod_bias;
775          COPY_4V(samplers[i].border_color, sampler->border_color.f);
776       }
777    }
778 }
779 
780 static void
swr_update_constants(struct swr_context * ctx,enum pipe_shader_type shaderType)781 swr_update_constants(struct swr_context *ctx, enum pipe_shader_type shaderType)
782 {
783    swr_draw_context *pDC = &ctx->swrDC;
784 
785    const float **constant;
786    uint32_t *num_constants;
787    struct swr_scratch_space *scratch;
788 
789    switch (shaderType) {
790    case PIPE_SHADER_VERTEX:
791       constant = pDC->constantVS;
792       num_constants = pDC->num_constantsVS;
793       scratch = &ctx->scratch->vs_constants;
794       break;
795    case PIPE_SHADER_FRAGMENT:
796       constant = pDC->constantFS;
797       num_constants = pDC->num_constantsFS;
798       scratch = &ctx->scratch->fs_constants;
799       break;
800    default:
801       debug_printf("Unsupported shader type constants\n");
802       return;
803    }
804 
805    for (UINT i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
806       const pipe_constant_buffer *cb = &ctx->constants[shaderType][i];
807       num_constants[i] = cb->buffer_size;
808       if (cb->buffer) {
809          constant[i] =
810             (const float *)(swr_resource_data(cb->buffer) +
811                             cb->buffer_offset);
812       } else {
813          /* Need to copy these constants to scratch space */
814          if (cb->user_buffer && cb->buffer_size) {
815             const void *ptr =
816                ((const uint8_t *)cb->user_buffer + cb->buffer_offset);
817             uint32_t size = AlignUp(cb->buffer_size, 4);
818             ptr = swr_copy_to_scratch_space(ctx, scratch, ptr, size);
819             constant[i] = (const float *)ptr;
820          }
821       }
822    }
823 }
824 
825 static bool
swr_change_rt(struct swr_context * ctx,unsigned attachment,const struct pipe_surface * sf)826 swr_change_rt(struct swr_context *ctx,
827               unsigned attachment,
828               const struct pipe_surface *sf)
829 {
830    swr_draw_context *pDC = &ctx->swrDC;
831    struct SWR_SURFACE_STATE *rt = &pDC->renderTargets[attachment];
832 
833    /* Do nothing if the render target hasn't changed */
834    if ((!sf || !sf->texture) && rt->pBaseAddress == nullptr)
835       return false;
836 
837    /* Deal with disabling RT up front */
838    if (!sf || !sf->texture) {
839       /* If detaching attachment, mark tiles as RESOLVED so core
840        * won't try to load from non-existent target. */
841       swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_RESOLVED);
842       *rt = {0};
843       return true;
844    }
845 
846    const struct swr_resource *swr = swr_resource(sf->texture);
847    const SWR_SURFACE_STATE *swr_surface = &swr->swr;
848    SWR_FORMAT fmt = mesa_to_swr_format(sf->format);
849 
850    if (attachment == SWR_ATTACHMENT_STENCIL && swr->secondary.pBaseAddress) {
851       swr_surface = &swr->secondary;
852       fmt = swr_surface->format;
853    }
854 
855    if (rt->pBaseAddress == swr_surface->pBaseAddress &&
856        rt->format == fmt &&
857        rt->lod == sf->u.tex.level &&
858        rt->arrayIndex == sf->u.tex.first_layer)
859       return false;
860 
861    bool need_fence = false;
862 
863    /* StoreTile for changed target */
864    if (rt->pBaseAddress) {
865       /* If changing attachment to a new target, mark tiles as
866        * INVALID so they are reloaded from surface. */
867       swr_store_render_target(&ctx->pipe, attachment, SWR_TILE_INVALID);
868       need_fence = true;
869    }
870 
871    /* Make new attachment */
872    *rt = *swr_surface;
873    rt->format = fmt;
874    rt->lod = sf->u.tex.level;
875    rt->arrayIndex = sf->u.tex.first_layer;
876 
877    return need_fence;
878 }
879 
880 static inline void
swr_user_vbuf_range(const struct pipe_draw_info * info,const struct swr_vertex_element_state * velems,const struct pipe_vertex_buffer * vb,uint32_t i,uint32_t * totelems,uint32_t * base,uint32_t * size)881 swr_user_vbuf_range(const struct pipe_draw_info *info,
882                     const struct swr_vertex_element_state *velems,
883                     const struct pipe_vertex_buffer *vb,
884                     uint32_t i,
885                     uint32_t *totelems,
886                     uint32_t *base,
887                     uint32_t *size)
888 {
889    /* FIXME: The size is too large - we don't access the full extra stride. */
890    unsigned elems;
891    if (velems->instanced_bufs & (1U << i)) {
892       elems = info->instance_count / velems->min_instance_div[i] + 1;
893       *totelems = info->start_instance + elems;
894       *base = info->start_instance * vb->stride;
895       *size = elems * vb->stride;
896    } else if (vb->stride) {
897       elems = info->max_index - info->min_index + 1;
898       *totelems = info->max_index + 1;
899       *base = info->min_index * vb->stride;
900       *size = elems * vb->stride;
901    } else {
902       *totelems = 1;
903       *base = 0;
904       *size = velems->stream_pitch[i];
905    }
906 }
907 
908 void
swr_update_derived(struct pipe_context * pipe,const struct pipe_draw_info * p_draw_info)909 swr_update_derived(struct pipe_context *pipe,
910                    const struct pipe_draw_info *p_draw_info)
911 {
912    struct swr_context *ctx = swr_context(pipe);
913    struct swr_screen *screen = swr_screen(pipe->screen);
914 
915    /* Update screen->pipe to current pipe context. */
916    if (screen->pipe != pipe)
917       screen->pipe = pipe;
918 
919    /* Any state that requires dirty flags to be re-triggered sets this mask */
920    /* For example, user_buffer vertex and index buffers. */
921    unsigned post_update_dirty_flags = 0;
922 
923    /* Render Targets */
924    if (ctx->dirty & SWR_NEW_FRAMEBUFFER) {
925       struct pipe_framebuffer_state *fb = &ctx->framebuffer;
926       const struct util_format_description *desc = NULL;
927       bool need_fence = false;
928 
929       /* colorbuffer targets */
930       if (fb->nr_cbufs) {
931          for (unsigned i = 0; i < fb->nr_cbufs; ++i)
932             need_fence |= swr_change_rt(
933                   ctx, SWR_ATTACHMENT_COLOR0 + i, fb->cbufs[i]);
934       }
935       for (unsigned i = fb->nr_cbufs; i < SWR_NUM_RENDERTARGETS; ++i)
936          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_COLOR0 + i, NULL);
937 
938       /* depth/stencil target */
939       if (fb->zsbuf)
940          desc = util_format_description(fb->zsbuf->format);
941       if (fb->zsbuf && util_format_has_depth(desc))
942          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, fb->zsbuf);
943       else
944          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_DEPTH, NULL);
945 
946       if (fb->zsbuf && util_format_has_stencil(desc))
947          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, fb->zsbuf);
948       else
949          need_fence |= swr_change_rt(ctx, SWR_ATTACHMENT_STENCIL, NULL);
950 
951       /* This fence ensures any attachment changes are resolved before the
952        * next draw */
953       if (need_fence)
954          swr_fence_submit(ctx, screen->flush_fence);
955    }
956 
957    /* Raster state */
958    if (ctx->dirty & (SWR_NEW_RASTERIZER |
959                      SWR_NEW_VS | // clipping
960                      SWR_NEW_FRAMEBUFFER)) {
961       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
962       pipe_framebuffer_state *fb = &ctx->framebuffer;
963 
964       SWR_RASTSTATE *rastState = &ctx->derived.rastState;
965       rastState->cullMode = swr_convert_cull_mode(rasterizer->cull_face);
966       rastState->frontWinding = rasterizer->front_ccw
967          ? SWR_FRONTWINDING_CCW
968          : SWR_FRONTWINDING_CW;
969       rastState->scissorEnable = rasterizer->scissor;
970       rastState->pointSize = rasterizer->point_size > 0.0f
971          ? rasterizer->point_size
972          : 1.0f;
973       rastState->lineWidth = rasterizer->line_width > 0.0f
974          ? rasterizer->line_width
975          : 1.0f;
976 
977       rastState->pointParam = rasterizer->point_size_per_vertex;
978 
979       rastState->pointSpriteEnable = rasterizer->sprite_coord_enable;
980       rastState->pointSpriteTopOrigin =
981          rasterizer->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT;
982 
983       /* XXX TODO: Add multisample */
984       rastState->msaaRastEnable = false;
985       rastState->rastMode = SWR_MSAA_RASTMODE_OFF_PIXEL;
986       rastState->sampleCount = SWR_MULTISAMPLE_1X;
987       rastState->forcedSampleCount = false;
988 
989       bool do_offset = false;
990       switch (rasterizer->fill_front) {
991       case PIPE_POLYGON_MODE_FILL:
992          do_offset = rasterizer->offset_tri;
993          break;
994       case PIPE_POLYGON_MODE_LINE:
995          do_offset = rasterizer->offset_line;
996          break;
997       case PIPE_POLYGON_MODE_POINT:
998          do_offset = rasterizer->offset_point;
999          break;
1000       }
1001 
1002       if (do_offset) {
1003          rastState->depthBias = rasterizer->offset_units;
1004          rastState->slopeScaledDepthBias = rasterizer->offset_scale;
1005          rastState->depthBiasClamp = rasterizer->offset_clamp;
1006       } else {
1007          rastState->depthBias = 0;
1008          rastState->slopeScaledDepthBias = 0;
1009          rastState->depthBiasClamp = 0;
1010       }
1011       struct pipe_surface *zb = fb->zsbuf;
1012       if (zb && swr_resource(zb->texture)->has_depth)
1013          rastState->depthFormat = swr_resource(zb->texture)->swr.format;
1014 
1015       rastState->depthClipEnable = rasterizer->depth_clip;
1016       rastState->clipHalfZ = rasterizer->clip_halfz;
1017 
1018       rastState->clipDistanceMask =
1019          ctx->vs->info.base.num_written_clipdistance ?
1020          ctx->vs->info.base.clipdist_writemask & rasterizer->clip_plane_enable :
1021          rasterizer->clip_plane_enable;
1022 
1023       rastState->cullDistanceMask =
1024          ctx->vs->info.base.culldist_writemask << ctx->vs->info.base.num_written_clipdistance;
1025 
1026       SwrSetRastState(ctx->swrContext, rastState);
1027    }
1028 
1029    /* Scissor */
1030    if (ctx->dirty & SWR_NEW_SCISSOR) {
1031       SwrSetScissorRects(ctx->swrContext, 1, &ctx->swr_scissor);
1032    }
1033 
1034    /* Viewport */
1035    if (ctx->dirty & (SWR_NEW_VIEWPORT | SWR_NEW_FRAMEBUFFER
1036                      | SWR_NEW_RASTERIZER)) {
1037       pipe_viewport_state *state = &ctx->viewport;
1038       pipe_framebuffer_state *fb = &ctx->framebuffer;
1039       pipe_rasterizer_state *rasterizer = ctx->rasterizer;
1040 
1041       SWR_VIEWPORT *vp = &ctx->derived.vp;
1042       SWR_VIEWPORT_MATRICES *vpm = &ctx->derived.vpm;
1043 
1044       vp->x = state->translate[0] - state->scale[0];
1045       vp->width = 2 * state->scale[0];
1046       vp->y = state->translate[1] - fabs(state->scale[1]);
1047       vp->height = 2 * fabs(state->scale[1]);
1048       util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
1049                               &vp->minZ, &vp->maxZ);
1050 
1051       vpm->m00[0] = state->scale[0];
1052       vpm->m11[0] = state->scale[1];
1053       vpm->m22[0] = state->scale[2];
1054       vpm->m30[0] = state->translate[0];
1055       vpm->m31[0] = state->translate[1];
1056       vpm->m32[0] = state->translate[2];
1057 
1058       /* Now that the matrix is calculated, clip the view coords to screen
1059        * size.  OpenGL allows for -ve x,y in the viewport. */
1060       if (vp->x < 0.0f) {
1061          vp->width += vp->x;
1062          vp->x = 0.0f;
1063       }
1064       if (vp->y < 0.0f) {
1065          vp->height += vp->y;
1066          vp->y = 0.0f;
1067       }
1068       vp->width = std::min(vp->width, (float)fb->width - vp->x);
1069       vp->height = std::min(vp->height, (float)fb->height - vp->y);
1070 
1071       SwrSetViewports(ctx->swrContext, 1, vp, vpm);
1072    }
1073 
1074    /* Set vertex & index buffers */
1075    /* (using draw info if called by swr_draw_vbo) */
1076    if (ctx->dirty & SWR_NEW_VERTEX) {
1077       uint32_t scratch_total;
1078       uint8_t *scratch = NULL;
1079 
1080       /* If being called by swr_draw_vbo, copy draw details */
1081       struct pipe_draw_info info = {0};
1082       if (p_draw_info)
1083          info = *p_draw_info;
1084 
1085       /* We must get all the scratch space in one go */
1086       scratch_total = 0;
1087       for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1088          struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1089 
1090          if (!vb->user_buffer)
1091             continue;
1092 
1093          uint32_t elems, base, size;
1094          swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1095          scratch_total += AlignUp(size, 4);
1096       }
1097 
1098       if (scratch_total) {
1099          scratch = (uint8_t *)swr_copy_to_scratch_space(
1100                ctx, &ctx->scratch->vertex_buffer, NULL, scratch_total);
1101       }
1102 
1103       /* vertex buffers */
1104       SWR_VERTEX_BUFFER_STATE swrVertexBuffers[PIPE_MAX_ATTRIBS];
1105       for (UINT i = 0; i < ctx->num_vertex_buffers; i++) {
1106          uint32_t size, pitch, elems, partial_inbounds;
1107          const uint8_t *p_data;
1108          struct pipe_vertex_buffer *vb = &ctx->vertex_buffer[i];
1109 
1110          pitch = vb->stride;
1111          if (!vb->user_buffer) {
1112             /* VBO
1113              * size is based on buffer->width0 rather than info.max_index
1114              * to prevent having to validate VBO on each draw */
1115             size = vb->buffer->width0;
1116             elems = size / pitch;
1117             partial_inbounds = size % pitch;
1118 
1119             p_data = swr_resource_data(vb->buffer) + vb->buffer_offset;
1120          } else {
1121             /* Client buffer
1122              * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1123              * revalidate on each draw */
1124             post_update_dirty_flags |= SWR_NEW_VERTEX;
1125 
1126             uint32_t base;
1127             swr_user_vbuf_range(&info, ctx->velems, vb, i, &elems, &base, &size);
1128             partial_inbounds = 0;
1129 
1130             /* Copy only needed vertices to scratch space */
1131             size = AlignUp(size, 4);
1132             const void *ptr = (const uint8_t *) vb->user_buffer + base;
1133             memcpy(scratch, ptr, size);
1134             ptr = scratch;
1135             scratch += size;
1136             p_data = (const uint8_t *)ptr - base;
1137          }
1138 
1139          swrVertexBuffers[i] = {0};
1140          swrVertexBuffers[i].index = i;
1141          swrVertexBuffers[i].pitch = pitch;
1142          swrVertexBuffers[i].pData = p_data;
1143          swrVertexBuffers[i].size = size;
1144          swrVertexBuffers[i].maxVertex = elems;
1145          swrVertexBuffers[i].partialInboundsSize = partial_inbounds;
1146       }
1147 
1148       SwrSetVertexBuffers(
1149          ctx->swrContext, ctx->num_vertex_buffers, swrVertexBuffers);
1150 
1151       /* index buffer, if required (info passed in by swr_draw_vbo) */
1152       SWR_FORMAT index_type = R32_UINT; /* Default for non-indexed draws */
1153       if (info.indexed) {
1154          const uint8_t *p_data;
1155          uint32_t size, pitch;
1156          struct pipe_index_buffer *ib = &ctx->index_buffer;
1157 
1158          pitch = ib->index_size ? ib->index_size : sizeof(uint32_t);
1159          index_type = swr_convert_index_type(pitch);
1160 
1161          if (!ib->user_buffer) {
1162             /* VBO
1163              * size is based on buffer->width0 rather than info.count
1164              * to prevent having to validate VBO on each draw */
1165             size = ib->buffer->width0;
1166             p_data = swr_resource_data(ib->buffer) + ib->offset;
1167          } else {
1168             /* Client buffer
1169              * client memory is one-time use, re-trigger SWR_NEW_VERTEX to
1170              * revalidate on each draw */
1171             post_update_dirty_flags |= SWR_NEW_VERTEX;
1172 
1173             size = info.count * pitch;
1174             size = AlignUp(size, 4);
1175 
1176             /* Copy indices to scratch space */
1177             const void *ptr = ib->user_buffer;
1178             ptr = swr_copy_to_scratch_space(
1179                ctx, &ctx->scratch->index_buffer, ptr, size);
1180             p_data = (const uint8_t *)ptr;
1181          }
1182 
1183          SWR_INDEX_BUFFER_STATE swrIndexBuffer;
1184          swrIndexBuffer.format = swr_convert_index_type(ib->index_size);
1185          swrIndexBuffer.pIndices = p_data;
1186          swrIndexBuffer.size = size;
1187 
1188          SwrSetIndexBuffer(ctx->swrContext, &swrIndexBuffer);
1189       }
1190 
1191       struct swr_vertex_element_state *velems = ctx->velems;
1192       if (velems && velems->fsState.indexType != index_type) {
1193          velems->fsFunc = NULL;
1194          velems->fsState.indexType = index_type;
1195       }
1196    }
1197 
1198    /* VertexShader */
1199    if (ctx->dirty & (SWR_NEW_VS |
1200                      SWR_NEW_RASTERIZER | // for clip planes
1201                      SWR_NEW_SAMPLER |
1202                      SWR_NEW_SAMPLER_VIEW |
1203                      SWR_NEW_FRAMEBUFFER)) {
1204       swr_jit_vs_key key;
1205       swr_generate_vs_key(key, ctx, ctx->vs);
1206       auto search = ctx->vs->map.find(key);
1207       PFN_VERTEX_FUNC func;
1208       if (search != ctx->vs->map.end()) {
1209          func = search->second->shader;
1210       } else {
1211          func = swr_compile_vs(ctx, key);
1212       }
1213       SwrSetVertexFunc(ctx->swrContext, func);
1214 
1215       /* JIT sampler state */
1216       if (ctx->dirty & SWR_NEW_SAMPLER) {
1217          swr_update_sampler_state(ctx,
1218                                   PIPE_SHADER_VERTEX,
1219                                   key.nr_samplers,
1220                                   ctx->swrDC.samplersVS);
1221       }
1222 
1223       /* JIT sampler view state */
1224       if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1225          swr_update_texture_state(ctx,
1226                                   PIPE_SHADER_VERTEX,
1227                                   key.nr_sampler_views,
1228                                   ctx->swrDC.texturesVS);
1229       }
1230    }
1231 
1232    /* FragmentShader */
1233    if (ctx->dirty & (SWR_NEW_FS | SWR_NEW_SAMPLER | SWR_NEW_SAMPLER_VIEW
1234                      | SWR_NEW_RASTERIZER | SWR_NEW_FRAMEBUFFER)) {
1235       swr_jit_fs_key key;
1236       swr_generate_fs_key(key, ctx, ctx->fs);
1237       auto search = ctx->fs->map.find(key);
1238       PFN_PIXEL_KERNEL func;
1239       if (search != ctx->fs->map.end()) {
1240          func = search->second->shader;
1241       } else {
1242          func = swr_compile_fs(ctx, key);
1243       }
1244       SWR_PS_STATE psState = {0};
1245       psState.pfnPixelShader = func;
1246       psState.killsPixel = ctx->fs->info.base.uses_kill;
1247       psState.inputCoverage = SWR_INPUT_COVERAGE_NORMAL;
1248       psState.writesODepth = ctx->fs->info.base.writes_z;
1249       psState.usesSourceDepth = ctx->fs->info.base.reads_z;
1250       psState.shadingRate = SWR_SHADING_RATE_PIXEL; // XXX
1251       psState.numRenderTargets = ctx->framebuffer.nr_cbufs;
1252       psState.posOffset = SWR_PS_POSITION_SAMPLE_NONE; // XXX msaa
1253       uint32_t barycentricsMask = 0;
1254 #if 0
1255       // when we switch to mesa-master
1256       if (ctx->fs->info.base.uses_persp_center ||
1257           ctx->fs->info.base.uses_linear_center)
1258          barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1259       if (ctx->fs->info.base.uses_persp_centroid ||
1260           ctx->fs->info.base.uses_linear_centroid)
1261          barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1262       if (ctx->fs->info.base.uses_persp_sample ||
1263           ctx->fs->info.base.uses_linear_sample)
1264          barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1265 #else
1266       for (unsigned i = 0; i < ctx->fs->info.base.num_inputs; i++) {
1267          switch (ctx->fs->info.base.input_interpolate_loc[i]) {
1268          case TGSI_INTERPOLATE_LOC_CENTER:
1269             barycentricsMask |= SWR_BARYCENTRIC_PER_PIXEL_MASK;
1270             break;
1271          case TGSI_INTERPOLATE_LOC_CENTROID:
1272             barycentricsMask |= SWR_BARYCENTRIC_CENTROID_MASK;
1273             break;
1274          case TGSI_INTERPOLATE_LOC_SAMPLE:
1275             barycentricsMask |= SWR_BARYCENTRIC_PER_SAMPLE_MASK;
1276             break;
1277          }
1278       }
1279 #endif
1280       psState.barycentricsMask = barycentricsMask;
1281       psState.usesUAV = false; // XXX
1282       psState.forceEarlyZ = false;
1283       SwrSetPixelShaderState(ctx->swrContext, &psState);
1284 
1285       /* JIT sampler state */
1286       if (ctx->dirty & SWR_NEW_SAMPLER) {
1287          swr_update_sampler_state(ctx,
1288                                   PIPE_SHADER_FRAGMENT,
1289                                   key.nr_samplers,
1290                                   ctx->swrDC.samplersFS);
1291       }
1292 
1293       /* JIT sampler view state */
1294       if (ctx->dirty & (SWR_NEW_SAMPLER_VIEW | SWR_NEW_FRAMEBUFFER)) {
1295          swr_update_texture_state(ctx,
1296                                   PIPE_SHADER_FRAGMENT,
1297                                   key.nr_sampler_views,
1298                                   ctx->swrDC.texturesFS);
1299       }
1300    }
1301 
1302 
1303    /* VertexShader Constants */
1304    if (ctx->dirty & SWR_NEW_VSCONSTANTS) {
1305       swr_update_constants(ctx, PIPE_SHADER_VERTEX);
1306    }
1307 
1308    /* FragmentShader Constants */
1309    if (ctx->dirty & SWR_NEW_FSCONSTANTS) {
1310       swr_update_constants(ctx, PIPE_SHADER_FRAGMENT);
1311    }
1312 
1313    /* Depth/stencil state */
1314    if (ctx->dirty & (SWR_NEW_DEPTH_STENCIL_ALPHA | SWR_NEW_FRAMEBUFFER)) {
1315       struct pipe_depth_state *depth = &(ctx->depth_stencil->depth);
1316       struct pipe_stencil_state *stencil = ctx->depth_stencil->stencil;
1317       SWR_DEPTH_STENCIL_STATE depthStencilState = {{0}};
1318       SWR_DEPTH_BOUNDS_STATE depthBoundsState = {0};
1319 
1320       /* XXX, incomplete.  Need to flesh out stencil & alpha test state
1321       struct pipe_stencil_state *front_stencil =
1322       ctx->depth_stencil.stencil[0];
1323       struct pipe_stencil_state *back_stencil = ctx->depth_stencil.stencil[1];
1324       struct pipe_alpha_state alpha;
1325       */
1326       if (stencil[0].enabled) {
1327          depthStencilState.stencilWriteEnable = 1;
1328          depthStencilState.stencilTestEnable = 1;
1329          depthStencilState.stencilTestFunc =
1330             swr_convert_depth_func(stencil[0].func);
1331 
1332          depthStencilState.stencilPassDepthPassOp =
1333             swr_convert_stencil_op(stencil[0].zpass_op);
1334          depthStencilState.stencilPassDepthFailOp =
1335             swr_convert_stencil_op(stencil[0].zfail_op);
1336          depthStencilState.stencilFailOp =
1337             swr_convert_stencil_op(stencil[0].fail_op);
1338          depthStencilState.stencilWriteMask = stencil[0].writemask;
1339          depthStencilState.stencilTestMask = stencil[0].valuemask;
1340          depthStencilState.stencilRefValue = ctx->stencil_ref.ref_value[0];
1341       }
1342       if (stencil[1].enabled) {
1343          depthStencilState.doubleSidedStencilTestEnable = 1;
1344 
1345          depthStencilState.backfaceStencilTestFunc =
1346             swr_convert_depth_func(stencil[1].func);
1347 
1348          depthStencilState.backfaceStencilPassDepthPassOp =
1349             swr_convert_stencil_op(stencil[1].zpass_op);
1350          depthStencilState.backfaceStencilPassDepthFailOp =
1351             swr_convert_stencil_op(stencil[1].zfail_op);
1352          depthStencilState.backfaceStencilFailOp =
1353             swr_convert_stencil_op(stencil[1].fail_op);
1354          depthStencilState.backfaceStencilWriteMask = stencil[1].writemask;
1355          depthStencilState.backfaceStencilTestMask = stencil[1].valuemask;
1356 
1357          depthStencilState.backfaceStencilRefValue =
1358             ctx->stencil_ref.ref_value[1];
1359       }
1360 
1361       depthStencilState.depthTestEnable = depth->enabled;
1362       depthStencilState.depthTestFunc = swr_convert_depth_func(depth->func);
1363       depthStencilState.depthWriteEnable = depth->writemask;
1364       SwrSetDepthStencilState(ctx->swrContext, &depthStencilState);
1365 
1366       depthBoundsState.depthBoundsTestEnable = depth->bounds_test;
1367       depthBoundsState.depthBoundsTestMinValue = depth->bounds_min;
1368       depthBoundsState.depthBoundsTestMaxValue = depth->bounds_max;
1369       SwrSetDepthBoundsState(ctx->swrContext, &depthBoundsState);
1370    }
1371 
1372    /* Blend State */
1373    if (ctx->dirty & (SWR_NEW_BLEND |
1374                      SWR_NEW_FRAMEBUFFER |
1375                      SWR_NEW_DEPTH_STENCIL_ALPHA)) {
1376       struct pipe_framebuffer_state *fb = &ctx->framebuffer;
1377 
1378       SWR_BLEND_STATE blendState;
1379       memcpy(&blendState, &ctx->blend->blendState, sizeof(blendState));
1380       blendState.constantColor[0] = ctx->blend_color.color[0];
1381       blendState.constantColor[1] = ctx->blend_color.color[1];
1382       blendState.constantColor[2] = ctx->blend_color.color[2];
1383       blendState.constantColor[3] = ctx->blend_color.color[3];
1384       blendState.alphaTestReference =
1385          *((uint32_t*)&ctx->depth_stencil->alpha.ref_value);
1386 
1387       // XXX MSAA
1388       blendState.sampleMask = 0;
1389       blendState.sampleCount = SWR_MULTISAMPLE_1X;
1390 
1391       /* If there are no color buffers bound, disable writes on RT0
1392        * and skip loop */
1393       if (fb->nr_cbufs == 0) {
1394          blendState.renderTarget[0].writeDisableRed = 1;
1395          blendState.renderTarget[0].writeDisableGreen = 1;
1396          blendState.renderTarget[0].writeDisableBlue = 1;
1397          blendState.renderTarget[0].writeDisableAlpha = 1;
1398          SwrSetBlendFunc(ctx->swrContext, 0, NULL);
1399       }
1400       else
1401          for (int target = 0;
1402                target < std::min(SWR_NUM_RENDERTARGETS,
1403                                  PIPE_MAX_COLOR_BUFS);
1404                target++) {
1405             if (!fb->cbufs[target])
1406                continue;
1407 
1408             struct swr_resource *colorBuffer =
1409                swr_resource(fb->cbufs[target]->texture);
1410 
1411             BLEND_COMPILE_STATE compileState;
1412             memset(&compileState, 0, sizeof(compileState));
1413             compileState.format = colorBuffer->swr.format;
1414             memcpy(&compileState.blendState,
1415                    &ctx->blend->compileState[target],
1416                    sizeof(compileState.blendState));
1417 
1418             const SWR_FORMAT_INFO& info = GetFormatInfo(compileState.format);
1419             if (compileState.blendState.logicOpEnable &&
1420                 ((info.type[0] == SWR_TYPE_FLOAT) || info.isSRGB)) {
1421                compileState.blendState.logicOpEnable = false;
1422             }
1423 
1424             if (info.type[0] == SWR_TYPE_SINT || info.type[0] == SWR_TYPE_UINT)
1425                compileState.blendState.blendEnable = false;
1426 
1427             if (compileState.blendState.blendEnable == false &&
1428                 compileState.blendState.logicOpEnable == false &&
1429                 ctx->depth_stencil->alpha.enabled == 0) {
1430                SwrSetBlendFunc(ctx->swrContext, target, NULL);
1431                continue;
1432             }
1433 
1434             compileState.desc.alphaTestEnable =
1435                ctx->depth_stencil->alpha.enabled;
1436             compileState.desc.independentAlphaBlendEnable =
1437                (compileState.blendState.sourceBlendFactor !=
1438                 compileState.blendState.sourceAlphaBlendFactor) ||
1439                (compileState.blendState.destBlendFactor !=
1440                 compileState.blendState.destAlphaBlendFactor) ||
1441                (compileState.blendState.colorBlendFunc !=
1442                 compileState.blendState.alphaBlendFunc);
1443             compileState.desc.alphaToCoverageEnable =
1444                ctx->blend->pipe.alpha_to_coverage;
1445             compileState.desc.sampleMaskEnable = 0; // XXX
1446             compileState.desc.numSamples = 1; // XXX
1447 
1448             compileState.alphaTestFunction =
1449                swr_convert_depth_func(ctx->depth_stencil->alpha.func);
1450             compileState.alphaTestFormat = ALPHA_TEST_FLOAT32; // xxx
1451 
1452             compileState.Canonicalize();
1453 
1454             PFN_BLEND_JIT_FUNC func = NULL;
1455             auto search = ctx->blendJIT->find(compileState);
1456             if (search != ctx->blendJIT->end()) {
1457                func = search->second;
1458             } else {
1459                HANDLE hJitMgr = screen->hJitMgr;
1460                func = JitCompileBlend(hJitMgr, compileState);
1461                debug_printf("BLEND shader %p\n", func);
1462                assert(func && "Error: BlendShader = NULL");
1463 
1464                ctx->blendJIT->insert(std::make_pair(compileState, func));
1465             }
1466             SwrSetBlendFunc(ctx->swrContext, target, func);
1467          }
1468 
1469       SwrSetBlendState(ctx->swrContext, &blendState);
1470    }
1471 
1472    if (ctx->dirty & SWR_NEW_STIPPLE) {
1473       /* XXX What to do with this one??? SWR doesn't stipple */
1474    }
1475 
1476    if (ctx->dirty & (SWR_NEW_VS | SWR_NEW_SO | SWR_NEW_RASTERIZER)) {
1477       ctx->vs->soState.rasterizerDisable =
1478          ctx->rasterizer->rasterizer_discard;
1479       SwrSetSoState(ctx->swrContext, &ctx->vs->soState);
1480 
1481       pipe_stream_output_info *stream_output = &ctx->vs->pipe.stream_output;
1482 
1483       for (uint32_t i = 0; i < ctx->num_so_targets; i++) {
1484          SWR_STREAMOUT_BUFFER buffer = {0};
1485          if (!ctx->so_targets[i])
1486             continue;
1487          buffer.enable = true;
1488          buffer.pBuffer =
1489             (uint32_t *)(swr_resource_data(ctx->so_targets[i]->buffer) +
1490                          ctx->so_targets[i]->buffer_offset);
1491          buffer.bufferSize = ctx->so_targets[i]->buffer_size >> 2;
1492          buffer.pitch = stream_output->stride[i];
1493          buffer.streamOffset = 0;
1494 
1495          SwrSetSoBuffers(ctx->swrContext, &buffer, i);
1496       }
1497    }
1498 
1499    if (ctx->dirty & SWR_NEW_CLIP) {
1500       // shader exporting clip distances overrides all user clip planes
1501       if (ctx->rasterizer->clip_plane_enable &&
1502           !ctx->vs->info.base.num_written_clipdistance)
1503       {
1504          swr_draw_context *pDC = &ctx->swrDC;
1505          memcpy(pDC->userClipPlanes,
1506                 ctx->clip.ucp,
1507                 sizeof(pDC->userClipPlanes));
1508       }
1509    }
1510 
1511    // set up backend state
1512    SWR_BACKEND_STATE backendState = {0};
1513    backendState.numAttributes =
1514       ctx->vs->info.base.num_outputs - 1 +
1515       (ctx->rasterizer->sprite_coord_enable ? 1 : 0);
1516    for (unsigned i = 0; i < backendState.numAttributes; i++)
1517       backendState.numComponents[i] = 4;
1518    backendState.constantInterpolationMask = ctx->fs->constantMask |
1519       (ctx->rasterizer->flatshade ? ctx->fs->flatConstantMask : 0);
1520    backendState.pointSpriteTexCoordMask = ctx->fs->pointSpriteMask;
1521 
1522    SwrSetBackendState(ctx->swrContext, &backendState);
1523 
1524    /* Ensure that any in-progress attachment change StoreTiles finish */
1525    if (swr_is_fence_pending(screen->flush_fence))
1526       swr_fence_finish(pipe->screen, NULL, screen->flush_fence, 0);
1527 
1528    /* Finally, update the in-use status of all resources involved in draw */
1529    swr_update_resource_status(pipe, p_draw_info);
1530 
1531    ctx->dirty = post_update_dirty_flags;
1532 }
1533 
1534 
1535 static struct pipe_stream_output_target *
swr_create_so_target(struct pipe_context * pipe,struct pipe_resource * buffer,unsigned buffer_offset,unsigned buffer_size)1536 swr_create_so_target(struct pipe_context *pipe,
1537                      struct pipe_resource *buffer,
1538                      unsigned buffer_offset,
1539                      unsigned buffer_size)
1540 {
1541    struct pipe_stream_output_target *target;
1542 
1543    target = CALLOC_STRUCT(pipe_stream_output_target);
1544    if (!target)
1545       return NULL;
1546 
1547    target->context = pipe;
1548    target->reference.count = 1;
1549    pipe_resource_reference(&target->buffer, buffer);
1550    target->buffer_offset = buffer_offset;
1551    target->buffer_size = buffer_size;
1552    return target;
1553 }
1554 
1555 static void
swr_destroy_so_target(struct pipe_context * pipe,struct pipe_stream_output_target * target)1556 swr_destroy_so_target(struct pipe_context *pipe,
1557                       struct pipe_stream_output_target *target)
1558 {
1559    pipe_resource_reference(&target->buffer, NULL);
1560    FREE(target);
1561 }
1562 
1563 static void
swr_set_so_targets(struct pipe_context * pipe,unsigned num_targets,struct pipe_stream_output_target ** targets,const unsigned * offsets)1564 swr_set_so_targets(struct pipe_context *pipe,
1565                    unsigned num_targets,
1566                    struct pipe_stream_output_target **targets,
1567                    const unsigned *offsets)
1568 {
1569    struct swr_context *swr = swr_context(pipe);
1570    uint32_t i;
1571 
1572    assert(num_targets <= MAX_SO_STREAMS);
1573 
1574    for (i = 0; i < num_targets; i++) {
1575       pipe_so_target_reference(
1576          (struct pipe_stream_output_target **)&swr->so_targets[i],
1577          targets[i]);
1578    }
1579 
1580    for (/* fall-through */; i < swr->num_so_targets; i++) {
1581       pipe_so_target_reference(
1582          (struct pipe_stream_output_target **)&swr->so_targets[i], NULL);
1583    }
1584 
1585    swr->num_so_targets = num_targets;
1586 
1587    swr->dirty |= SWR_NEW_SO;
1588 }
1589 
1590 
1591 void
swr_state_init(struct pipe_context * pipe)1592 swr_state_init(struct pipe_context *pipe)
1593 {
1594    pipe->create_blend_state = swr_create_blend_state;
1595    pipe->bind_blend_state = swr_bind_blend_state;
1596    pipe->delete_blend_state = swr_delete_blend_state;
1597 
1598    pipe->create_depth_stencil_alpha_state = swr_create_depth_stencil_state;
1599    pipe->bind_depth_stencil_alpha_state = swr_bind_depth_stencil_state;
1600    pipe->delete_depth_stencil_alpha_state = swr_delete_depth_stencil_state;
1601 
1602    pipe->create_rasterizer_state = swr_create_rasterizer_state;
1603    pipe->bind_rasterizer_state = swr_bind_rasterizer_state;
1604    pipe->delete_rasterizer_state = swr_delete_rasterizer_state;
1605 
1606    pipe->create_sampler_state = swr_create_sampler_state;
1607    pipe->bind_sampler_states = swr_bind_sampler_states;
1608    pipe->delete_sampler_state = swr_delete_sampler_state;
1609 
1610    pipe->create_sampler_view = swr_create_sampler_view;
1611    pipe->set_sampler_views = swr_set_sampler_views;
1612    pipe->sampler_view_destroy = swr_sampler_view_destroy;
1613 
1614    pipe->create_vs_state = swr_create_vs_state;
1615    pipe->bind_vs_state = swr_bind_vs_state;
1616    pipe->delete_vs_state = swr_delete_vs_state;
1617 
1618    pipe->create_fs_state = swr_create_fs_state;
1619    pipe->bind_fs_state = swr_bind_fs_state;
1620    pipe->delete_fs_state = swr_delete_fs_state;
1621 
1622    pipe->set_constant_buffer = swr_set_constant_buffer;
1623 
1624    pipe->create_vertex_elements_state = swr_create_vertex_elements_state;
1625    pipe->bind_vertex_elements_state = swr_bind_vertex_elements_state;
1626    pipe->delete_vertex_elements_state = swr_delete_vertex_elements_state;
1627 
1628    pipe->set_vertex_buffers = swr_set_vertex_buffers;
1629    pipe->set_index_buffer = swr_set_index_buffer;
1630 
1631    pipe->set_polygon_stipple = swr_set_polygon_stipple;
1632    pipe->set_clip_state = swr_set_clip_state;
1633    pipe->set_scissor_states = swr_set_scissor_states;
1634    pipe->set_viewport_states = swr_set_viewport_states;
1635 
1636    pipe->set_framebuffer_state = swr_set_framebuffer_state;
1637 
1638    pipe->set_blend_color = swr_set_blend_color;
1639    pipe->set_stencil_ref = swr_set_stencil_ref;
1640 
1641    pipe->set_sample_mask = swr_set_sample_mask;
1642 
1643    pipe->create_stream_output_target = swr_create_so_target;
1644    pipe->stream_output_target_destroy = swr_destroy_so_target;
1645    pipe->set_stream_output_targets = swr_set_so_targets;
1646 }
1647