• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2008 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * AA point stage:  AA points are converted to quads and rendered with a
30  * special fragment shader.  Another approach would be to use a texture
31  * map image of a point, but experiments indicate the quality isn't nearly
32  * as good as this approach.
33  *
34  * Note: this looks a lot like draw_aaline.c but there's actually little
35  * if any code that can be shared.
36  *
37  * Authors:  Brian Paul
38  */
39 
40 
41 #include "pipe/p_context.h"
42 #include "pipe/p_defines.h"
43 #include "pipe/p_shader_tokens.h"
44 
45 #include "tgsi/tgsi_transform.h"
46 #include "tgsi/tgsi_dump.h"
47 
48 #include "util/u_math.h"
49 #include "util/u_memory.h"
50 
51 #include "draw_context.h"
52 #include "draw_vs.h"
53 #include "draw_pipe.h"
54 
55 #include "nir.h"
56 #include "nir/nir_draw_helpers.h"
57 
58 /** Approx number of new tokens for instructions in aa_transform_inst() */
59 #define NUM_NEW_TOKENS 200
60 
61 
62 /*
63  * Enabling NORMALIZE might give _slightly_ better results.
64  * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
65  * d=x*x+y*y.  Since we're working with a unit circle, the later seems
66  * close enough and saves some costly instructions.
67  */
68 #define NORMALIZE 0
69 
70 
71 /**
72  * Subclass of pipe_shader_state to carry extra fragment shader info.
73  */
74 struct aapoint_fragment_shader
75 {
76    struct pipe_shader_state state;
77    void *driver_fs;   /**< the regular shader */
78    void *aapoint_fs;  /**< the aa point-augmented shader */
79    int generic_attrib; /**< The generic input attrib/texcoord we'll use */
80 };
81 
82 
83 /**
84  * Subclass of draw_stage
85  */
86 struct aapoint_stage
87 {
88    struct draw_stage stage;
89 
90    /** half of pipe_rasterizer_state::point_size */
91    float radius;
92 
93    /** vertex attrib slot containing point size */
94    int psize_slot;
95 
96    /** this is the vertex attrib slot for the new texcoords */
97    uint tex_slot;
98 
99    /** vertex attrib slot containing position */
100    uint pos_slot;
101 
102    /** Currently bound fragment shader */
103    struct aapoint_fragment_shader *fs;
104 
105    /*
106     * Driver interface/override functions
107     */
108    void * (*driver_create_fs_state)(struct pipe_context *,
109                                     const struct pipe_shader_state *);
110    void (*driver_bind_fs_state)(struct pipe_context *, void *);
111    void (*driver_delete_fs_state)(struct pipe_context *, void *);
112 };
113 
114 
115 
116 /**
117  * Subclass of tgsi_transform_context, used for transforming the
118  * user's fragment shader to add the special AA instructions.
119  */
120 struct aa_transform_context {
121    struct tgsi_transform_context base;
122    uint tempsUsed;  /**< bitmask */
123    int colorOutput; /**< which output is the primary color */
124    int maxInput, maxGeneric;  /**< max input index found */
125    int tmp0, colorTemp;  /**< temp registers */
126 };
127 
128 
129 /**
130  * TGSI declaration transform callback.
131  * Look for two free temp regs and available input reg for new texcoords.
132  */
133 static void
aa_transform_decl(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)134 aa_transform_decl(struct tgsi_transform_context *ctx,
135                   struct tgsi_full_declaration *decl)
136 {
137    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
138 
139    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
140        decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
141        decl->Semantic.Index == 0) {
142       aactx->colorOutput = decl->Range.First;
143    }
144    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
145       if ((int) decl->Range.Last > aactx->maxInput)
146          aactx->maxInput = decl->Range.Last;
147       if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
148            (int) decl->Semantic.Index > aactx->maxGeneric) {
149          aactx->maxGeneric = decl->Semantic.Index;
150       }
151    }
152    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
153       uint i;
154       for (i = decl->Range.First;
155            i <= decl->Range.Last; i++) {
156          aactx->tempsUsed |= (1 << i);
157       }
158    }
159 
160    ctx->emit_declaration(ctx, decl);
161 }
162 
163 
164 /**
165  * TGSI transform callback.
166  * Insert new declarations and instructions before first instruction.
167  */
168 static void
aa_transform_prolog(struct tgsi_transform_context * ctx)169 aa_transform_prolog(struct tgsi_transform_context *ctx)
170 {
171    /* emit our new declarations before the first instruction */
172    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
173    struct tgsi_full_instruction newInst;
174    const int texInput = aactx->maxInput + 1;
175    int tmp0;
176    uint i;
177 
178    /* find two free temp regs */
179    for (i = 0; i < 32; i++) {
180       if ((aactx->tempsUsed & (1u << i)) == 0) {
181          /* found a free temp */
182          if (aactx->tmp0 < 0)
183             aactx->tmp0 = i;
184          else if (aactx->colorTemp < 0)
185             aactx->colorTemp = i;
186          else
187             break;
188       }
189    }
190 
191    assert(aactx->colorTemp != aactx->tmp0);
192 
193    tmp0 = aactx->tmp0;
194 
195    /* declare new generic input/texcoord */
196    tgsi_transform_input_decl(ctx, texInput,
197                              TGSI_SEMANTIC_GENERIC, aactx->maxGeneric + 1,
198                              TGSI_INTERPOLATE_LINEAR);
199 
200    /* declare new temp regs */
201    tgsi_transform_temp_decl(ctx, tmp0);
202    tgsi_transform_temp_decl(ctx, aactx->colorTemp);
203 
204    /*
205     * Emit code to compute fragment coverage, kill if outside point radius
206     *
207     * Temp reg0 usage:
208     *  t0.x = distance of fragment from center point
209     *  t0.y = boolean, is t0.x > 1.0, also misc temp usage
210     *  t0.z = temporary for computing 1/(1-k) value
211     *  t0.w = final coverage value
212     */
213 
214    /* MUL t0.xy, tex, tex;  # compute x^2, y^2 */
215    tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
216                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
217                            TGSI_FILE_INPUT, texInput,
218                            TGSI_FILE_INPUT, texInput, false);
219 
220    /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
221    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
222                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
223                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
224                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, false);
225 
226 #if NORMALIZE  /* OPTIONAL normalization of length */
227    /* RSQ t0.x, t0.x; */
228    tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RSQ,
229                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
230                            TGSI_FILE_TEMPORARY, tmp0);
231 
232    /* RCP t0.x, t0.x; */
233    tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RCP,
234                            TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
235                            TGSI_FILE_TEMPORARY, tmp0);
236 #endif
237 
238    /* SGT t0.y, t0.xxxx, tex.wwww;  # bool b = d > 1 (NOTE tex.w == 1) */
239    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SGT,
240                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
241                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
242                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, false);
243 
244    /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
245    tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
246                             TGSI_SWIZZLE_Y, TRUE);
247 
248    /* compute coverage factor = (1-d)/(1-k) */
249 
250    /* SUB t0.z, tex.w, tex.z;  # m = 1 - k */
251    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
252                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Z,
253                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
254                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, true);
255 
256    /* RCP t0.z, t0.z;  # t0.z = 1 / m */
257    newInst = tgsi_default_full_instruction();
258    newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
259    newInst.Instruction.NumDstRegs = 1;
260    newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
261    newInst.Dst[0].Register.Index = tmp0;
262    newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
263    newInst.Instruction.NumSrcRegs = 1;
264    newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
265    newInst.Src[0].Register.Index = tmp0;
266    newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
267    ctx->emit_instruction(ctx, &newInst);
268 
269    /* SUB t0.y, 1, t0.x;  # d = 1 - d */
270    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
271                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
272                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
273                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true);
274 
275    /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
276    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
277                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
278                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
279                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z, false);
280 
281    /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
282    tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SLE,
283                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
284                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
285                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, false);
286 
287    /* CMP t0.w, -t0.y, tex.w, t0.w;
288     *  # if -t0.y < 0 then
289     *       t0.w = 1
290     *    else
291     *       t0.w = t0.w
292     */
293    tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_CMP,
294                                TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
295                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, 1,
296                                TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
297                                TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
298 }
299 
300 
301 /**
302  * TGSI transform callback.
303  * Insert new instructions before the END instruction.
304  */
305 static void
aa_transform_epilog(struct tgsi_transform_context * ctx)306 aa_transform_epilog(struct tgsi_transform_context *ctx)
307 {
308    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
309 
310    /* add alpha modulation code at tail of program */
311 
312    /* MOV result.color.xyz, colorTemp; */
313    tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
314                            TGSI_FILE_OUTPUT, aactx->colorOutput,
315                            TGSI_WRITEMASK_XYZ,
316                            TGSI_FILE_TEMPORARY, aactx->colorTemp);
317 
318    /* MUL result.color.w, colorTemp, tmp0.w; */
319    tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
320                            TGSI_FILE_OUTPUT, aactx->colorOutput,
321                            TGSI_WRITEMASK_W,
322                            TGSI_FILE_TEMPORARY, aactx->colorTemp,
323                            TGSI_FILE_TEMPORARY, aactx->tmp0, false);
324 }
325 
326 
327 /**
328  * TGSI transform callback.
329  * Called per instruction.
330  * Replace writes to result.color w/ a temp reg.
331  */
332 static void
aa_transform_inst(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)333 aa_transform_inst(struct tgsi_transform_context *ctx,
334                   struct tgsi_full_instruction *inst)
335 {
336    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
337    unsigned i;
338 
339    /* Not an END instruction.
340     * Look for writes to result.color and replace with colorTemp reg.
341     */
342    for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
343       struct tgsi_full_dst_register *dst = &inst->Dst[i];
344       if (dst->Register.File == TGSI_FILE_OUTPUT &&
345           dst->Register.Index == aactx->colorOutput) {
346          dst->Register.File = TGSI_FILE_TEMPORARY;
347          dst->Register.Index = aactx->colorTemp;
348       }
349    }
350 
351    ctx->emit_instruction(ctx, inst);
352 }
353 
354 
355 /**
356  * Generate the frag shader we'll use for drawing AA points.
357  * This will be the user's shader plus some texture/modulate instructions.
358  */
359 static boolean
generate_aapoint_fs(struct aapoint_stage * aapoint)360 generate_aapoint_fs(struct aapoint_stage *aapoint)
361 {
362    const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
363    struct pipe_shader_state aapoint_fs;
364    struct aa_transform_context transform;
365    const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
366    struct pipe_context *pipe = aapoint->stage.draw->pipe;
367 
368    aapoint_fs = *orig_fs; /* copy to init */
369 
370    assert(aapoint_fs.type == PIPE_SHADER_IR_TGSI);
371 
372    memset(&transform, 0, sizeof(transform));
373    transform.colorOutput = -1;
374    transform.maxInput = -1;
375    transform.maxGeneric = -1;
376    transform.colorTemp = -1;
377    transform.tmp0 = -1;
378    transform.base.prolog = aa_transform_prolog;
379    transform.base.epilog = aa_transform_epilog;
380    transform.base.transform_instruction = aa_transform_inst;
381    transform.base.transform_declaration = aa_transform_decl;
382 
383    aapoint_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base);
384    if (!aapoint_fs.tokens)
385       return false;
386 
387 #if 0 /* DEBUG */
388    debug_printf("draw_aapoint, orig shader:\n");
389    tgsi_dump(orig_fs->tokens, 0);
390    debug_printf("draw_aapoint, new shader:\n");
391    tgsi_dump(aapoint_fs.tokens, 0);
392 #endif
393 
394    aapoint->fs->aapoint_fs
395       = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
396    if (aapoint->fs->aapoint_fs == NULL)
397       goto fail;
398 
399    aapoint->fs->generic_attrib = transform.maxGeneric + 1;
400    FREE((void *)aapoint_fs.tokens);
401    return TRUE;
402 
403 fail:
404    FREE((void *)aapoint_fs.tokens);
405    return FALSE;
406 }
407 
408 static boolean
generate_aapoint_fs_nir(struct aapoint_stage * aapoint)409 generate_aapoint_fs_nir(struct aapoint_stage *aapoint)
410 {
411    struct pipe_context *pipe = aapoint->stage.draw->pipe;
412    const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
413    struct pipe_shader_state aapoint_fs;
414 
415    aapoint_fs = *orig_fs; /* copy to init */
416    aapoint_fs.ir.nir = nir_shader_clone(NULL, orig_fs->ir.nir);
417    if (!aapoint_fs.ir.nir)
418       return FALSE;
419 
420    nir_lower_aapoint_fs(aapoint_fs.ir.nir, &aapoint->fs->generic_attrib);
421    aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
422    if (aapoint->fs->aapoint_fs == NULL)
423       goto fail;
424 
425    return TRUE;
426 
427 fail:
428    return FALSE;
429 }
430 
431 /**
432  * When we're about to draw our first AA point in a batch, this function is
433  * called to tell the driver to bind our modified fragment shader.
434  */
435 static boolean
bind_aapoint_fragment_shader(struct aapoint_stage * aapoint)436 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
437 {
438    struct draw_context *draw = aapoint->stage.draw;
439    struct pipe_context *pipe = draw->pipe;
440 
441    if (!aapoint->fs->aapoint_fs) {
442       if (aapoint->fs->state.type == PIPE_SHADER_IR_NIR) {
443          if (!generate_aapoint_fs_nir(aapoint))
444             return FALSE;
445       } else if (!generate_aapoint_fs(aapoint))
446          return FALSE;
447    }
448 
449    draw->suspend_flushing = TRUE;
450    aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
451    draw->suspend_flushing = FALSE;
452 
453    return TRUE;
454 }
455 
456 
457 
458 static inline struct aapoint_stage *
aapoint_stage(struct draw_stage * stage)459 aapoint_stage( struct draw_stage *stage )
460 {
461    return (struct aapoint_stage *) stage;
462 }
463 
464 
465 
466 
467 /**
468  * Draw an AA point by drawing a quad.
469  */
470 static void
aapoint_point(struct draw_stage * stage,struct prim_header * header)471 aapoint_point(struct draw_stage *stage, struct prim_header *header)
472 {
473    const struct aapoint_stage *aapoint = aapoint_stage(stage);
474    struct prim_header tri;
475    struct vertex_header *v[4];
476    const uint tex_slot = aapoint->tex_slot;
477    const uint pos_slot = aapoint->pos_slot;
478    float radius, *pos, *tex;
479    uint i;
480    float k;
481 
482    if (aapoint->psize_slot >= 0) {
483       radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
484    }
485    else {
486       radius = aapoint->radius;
487    }
488 
489    /*
490     * Note: the texcoords (generic attrib, really) we use are special:
491     * The S and T components simply vary from -1 to +1.
492     * The R component is k, below.
493     * The Q component is 1.0 and will used as a handy constant in the
494     * fragment shader.
495     */
496 
497    /*
498     * k is the threshold distance from the point's center at which
499     * we begin alpha attenuation (the coverage value).
500     * Operating within a unit circle, we'll compute the fragment's
501     * distance 'd' from the center point using the texcoords.
502     * IF d > 1.0 THEN
503     *    KILL fragment
504     * ELSE IF d > k THEN
505     *    compute coverage in [0,1] proportional to d in [k, 1].
506     * ELSE
507     *    coverage = 1.0;  // full coverage
508     * ENDIF
509     *
510     * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
511     * avoid using IF/ELSE/ENDIF TGSI opcodes.
512     */
513 
514 #if !NORMALIZE
515    k = 1.0f / radius;
516    k = 1.0f - 2.0f * k + k * k;
517 #else
518    k = 1.0f - 1.0f / radius;
519 #endif
520 
521    /* allocate/dup new verts */
522    for (i = 0; i < 4; i++) {
523       v[i] = dup_vert(stage, header->v[0], i);
524    }
525 
526    /* new verts */
527    pos = v[0]->data[pos_slot];
528    pos[0] -= radius;
529    pos[1] -= radius;
530 
531    pos = v[1]->data[pos_slot];
532    pos[0] += radius;
533    pos[1] -= radius;
534 
535    pos = v[2]->data[pos_slot];
536    pos[0] += radius;
537    pos[1] += radius;
538 
539    pos = v[3]->data[pos_slot];
540    pos[0] -= radius;
541    pos[1] += radius;
542 
543    /* new texcoords */
544    tex = v[0]->data[tex_slot];
545    ASSIGN_4V(tex, -1, -1, k, 1);
546 
547    tex = v[1]->data[tex_slot];
548    ASSIGN_4V(tex,  1, -1, k, 1);
549 
550    tex = v[2]->data[tex_slot];
551    ASSIGN_4V(tex,  1,  1, k, 1);
552 
553    tex = v[3]->data[tex_slot];
554    ASSIGN_4V(tex, -1,  1, k, 1);
555 
556    /* emit 2 tris for the quad strip */
557    tri.v[0] = v[0];
558    tri.v[1] = v[1];
559    tri.v[2] = v[2];
560    stage->next->tri( stage->next, &tri );
561 
562    tri.v[0] = v[0];
563    tri.v[1] = v[2];
564    tri.v[2] = v[3];
565    stage->next->tri( stage->next, &tri );
566 }
567 
568 
569 static void
aapoint_first_point(struct draw_stage * stage,struct prim_header * header)570 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
571 {
572    auto struct aapoint_stage *aapoint = aapoint_stage(stage);
573    struct draw_context *draw = stage->draw;
574    struct pipe_context *pipe = draw->pipe;
575    const struct pipe_rasterizer_state *rast = draw->rasterizer;
576    void *r;
577 
578    assert(draw->rasterizer->point_smooth && !draw->rasterizer->multisample);
579 
580    if (draw->rasterizer->point_size <= 2.0)
581       aapoint->radius = 1.0;
582    else
583       aapoint->radius = 0.5f * draw->rasterizer->point_size;
584 
585    /*
586     * Bind (generate) our fragprog.
587     */
588    bind_aapoint_fragment_shader(aapoint);
589 
590    draw_aapoint_prepare_outputs(draw, draw->pipeline.aapoint);
591 
592    draw->suspend_flushing = TRUE;
593 
594    /* Disable triangle culling, stippling, unfilled mode etc. */
595    r = draw_get_rasterizer_no_cull(draw, rast);
596    pipe->bind_rasterizer_state(pipe, r);
597 
598    draw->suspend_flushing = FALSE;
599 
600    /* now really draw first point */
601    stage->point = aapoint_point;
602    stage->point(stage, header);
603 }
604 
605 
606 static void
aapoint_flush(struct draw_stage * stage,unsigned flags)607 aapoint_flush(struct draw_stage *stage, unsigned flags)
608 {
609    struct draw_context *draw = stage->draw;
610    struct aapoint_stage *aapoint = aapoint_stage(stage);
611    struct pipe_context *pipe = draw->pipe;
612 
613    stage->point = aapoint_first_point;
614    stage->next->flush( stage->next, flags );
615 
616    /* restore original frag shader */
617    draw->suspend_flushing = TRUE;
618    aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL);
619 
620    /* restore original rasterizer state */
621    if (draw->rast_handle) {
622       pipe->bind_rasterizer_state(pipe, draw->rast_handle);
623    }
624 
625    draw->suspend_flushing = FALSE;
626 
627    draw_remove_extra_vertex_attribs(draw);
628 }
629 
630 
631 static void
aapoint_reset_stipple_counter(struct draw_stage * stage)632 aapoint_reset_stipple_counter(struct draw_stage *stage)
633 {
634    stage->next->reset_stipple_counter( stage->next );
635 }
636 
637 
638 static void
aapoint_destroy(struct draw_stage * stage)639 aapoint_destroy(struct draw_stage *stage)
640 {
641    struct aapoint_stage* aapoint = aapoint_stage(stage);
642    struct pipe_context *pipe = stage->draw->pipe;
643 
644    draw_free_temp_verts( stage );
645 
646    /* restore the old entry points */
647    pipe->create_fs_state = aapoint->driver_create_fs_state;
648    pipe->bind_fs_state = aapoint->driver_bind_fs_state;
649    pipe->delete_fs_state = aapoint->driver_delete_fs_state;
650 
651    FREE( stage );
652 }
653 
654 void
draw_aapoint_prepare_outputs(struct draw_context * draw,struct draw_stage * stage)655 draw_aapoint_prepare_outputs(struct draw_context *draw,
656                              struct draw_stage *stage)
657 {
658    struct aapoint_stage *aapoint = aapoint_stage(stage);
659    const struct pipe_rasterizer_state *rast = draw->rasterizer;
660 
661    /* update vertex attrib info */
662    aapoint->pos_slot = draw_current_shader_position_output(draw);
663 
664    if (!rast->point_smooth || rast->multisample)
665       return;
666 
667    if (aapoint->fs && aapoint->fs->aapoint_fs) {
668       /* allocate the extra post-transformed vertex attribute */
669       aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
670                                                          TGSI_SEMANTIC_GENERIC,
671                                                          aapoint->fs->generic_attrib);
672       assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
673    } else
674       aapoint->tex_slot = -1;
675 
676    /* find psize slot in post-transform vertex */
677    aapoint->psize_slot = -1;
678    if (draw->rasterizer->point_size_per_vertex) {
679       const struct tgsi_shader_info *info = draw_get_shader_info(draw);
680       uint i;
681       /* find PSIZ vertex output */
682       for (i = 0; i < info->num_outputs; i++) {
683          if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
684             aapoint->psize_slot = i;
685             break;
686          }
687       }
688    }
689 }
690 
691 static struct aapoint_stage *
draw_aapoint_stage(struct draw_context * draw)692 draw_aapoint_stage(struct draw_context *draw)
693 {
694    struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
695    if (!aapoint)
696       goto fail;
697 
698    aapoint->stage.draw = draw;
699    aapoint->stage.name = "aapoint";
700    aapoint->stage.next = NULL;
701    aapoint->stage.point = aapoint_first_point;
702    aapoint->stage.line = draw_pipe_passthrough_line;
703    aapoint->stage.tri = draw_pipe_passthrough_tri;
704    aapoint->stage.flush = aapoint_flush;
705    aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
706    aapoint->stage.destroy = aapoint_destroy;
707 
708    if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
709       goto fail;
710 
711    return aapoint;
712 
713  fail:
714    if (aapoint)
715       aapoint->stage.destroy(&aapoint->stage);
716 
717    return NULL;
718 
719 }
720 
721 
722 static struct aapoint_stage *
aapoint_stage_from_pipe(struct pipe_context * pipe)723 aapoint_stage_from_pipe(struct pipe_context *pipe)
724 {
725    struct draw_context *draw = (struct draw_context *) pipe->draw;
726    return aapoint_stage(draw->pipeline.aapoint);
727 }
728 
729 
730 /**
731  * This function overrides the driver's create_fs_state() function and
732  * will typically be called by the gallium frontend.
733  */
734 static void *
aapoint_create_fs_state(struct pipe_context * pipe,const struct pipe_shader_state * fs)735 aapoint_create_fs_state(struct pipe_context *pipe,
736                        const struct pipe_shader_state *fs)
737 {
738    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
739    struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
740    if (!aafs)
741       return NULL;
742 
743    aafs->state.type = fs->type;
744    if (fs->type == PIPE_SHADER_IR_TGSI)
745       aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
746    else
747       aafs->state.ir.nir = nir_shader_clone(NULL, fs->ir.nir);
748    /* pass-through */
749    aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
750 
751    return aafs;
752 }
753 
754 
755 static void
aapoint_bind_fs_state(struct pipe_context * pipe,void * fs)756 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
757 {
758    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
759    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
760    /* save current */
761    aapoint->fs = aafs;
762    /* pass-through */
763    aapoint->driver_bind_fs_state(pipe,
764                                  (aafs ? aafs->driver_fs : NULL));
765 }
766 
767 
768 static void
aapoint_delete_fs_state(struct pipe_context * pipe,void * fs)769 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
770 {
771    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
772    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
773 
774    /* pass-through */
775    aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
776 
777    if (aafs->aapoint_fs)
778       aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
779 
780    if (aafs->state.type == PIPE_SHADER_IR_TGSI)
781       FREE((void*)aafs->state.tokens);
782    else
783       ralloc_free(aafs->state.ir.nir);
784 
785    FREE(aafs);
786 }
787 
788 
789 /**
790  * Called by drivers that want to install this AA point prim stage
791  * into the draw module's pipeline.  This will not be used if the
792  * hardware has native support for AA points.
793  */
794 boolean
draw_install_aapoint_stage(struct draw_context * draw,struct pipe_context * pipe)795 draw_install_aapoint_stage(struct draw_context *draw,
796                            struct pipe_context *pipe)
797 {
798    struct aapoint_stage *aapoint;
799 
800    pipe->draw = (void *) draw;
801 
802    /*
803     * Create / install AA point drawing / prim stage
804     */
805    aapoint = draw_aapoint_stage( draw );
806    if (!aapoint)
807       return FALSE;
808 
809    /* save original driver functions */
810    aapoint->driver_create_fs_state = pipe->create_fs_state;
811    aapoint->driver_bind_fs_state = pipe->bind_fs_state;
812    aapoint->driver_delete_fs_state = pipe->delete_fs_state;
813 
814    /* override the driver's functions */
815    pipe->create_fs_state = aapoint_create_fs_state;
816    pipe->bind_fs_state = aapoint_bind_fs_state;
817    pipe->delete_fs_state = aapoint_delete_fs_state;
818 
819    draw->pipeline.aapoint = &aapoint->stage;
820 
821    return TRUE;
822 }
823