• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * AA point stage:  AA points are converted to quads and rendered with a
30  * special fragment shader.  Another approach would be to use a texture
31  * map image of a point, but experiments indicate the quality isn't nearly
32  * as good as this approach.
33  *
34  * Note: this looks a lot like draw_aaline.c but there's actually little
35  * if any code that can be shared.
36  *
37  * Authors:  Brian Paul
38  */
39 
40 
41 #include "pipe/p_context.h"
42 #include "pipe/p_defines.h"
43 #include "pipe/p_shader_tokens.h"
44 
45 #include "tgsi/tgsi_transform.h"
46 #include "tgsi/tgsi_dump.h"
47 
48 #include "util/u_math.h"
49 #include "util/u_memory.h"
50 
51 #include "draw_context.h"
52 #include "draw_vs.h"
53 #include "draw_pipe.h"
54 
55 
56 /** Approx number of new tokens for instructions in aa_transform_inst() */
57 #define NUM_NEW_TOKENS 200
58 
59 
60 /*
61  * Enabling NORMALIZE might give _slightly_ better results.
62  * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
63  * d=x*x+y*y.  Since we're working with a unit circle, the later seems
64  * close enough and saves some costly instructions.
65  */
66 #define NORMALIZE 0
67 
68 
69 /**
70  * Subclass of pipe_shader_state to carry extra fragment shader info.
71  */
72 struct aapoint_fragment_shader
73 {
74    struct pipe_shader_state state;
75    void *driver_fs;   /**< the regular shader */
76    void *aapoint_fs;  /**< the aa point-augmented shader */
77    int generic_attrib; /**< The generic input attrib/texcoord we'll use */
78 };
79 
80 
81 /**
82  * Subclass of draw_stage
83  */
84 struct aapoint_stage
85 {
86    struct draw_stage stage;
87 
88    /** half of pipe_rasterizer_state::point_size */
89    float radius;
90 
91    /** vertex attrib slot containing point size */
92    int psize_slot;
93 
94    /** this is the vertex attrib slot for the new texcoords */
95    uint tex_slot;
96 
97    /** vertex attrib slot containing position */
98    uint pos_slot;
99 
100    /** Currently bound fragment shader */
101    struct aapoint_fragment_shader *fs;
102 
103    /*
104     * Driver interface/override functions
105     */
106    void * (*driver_create_fs_state)(struct pipe_context *,
107                                     const struct pipe_shader_state *);
108    void (*driver_bind_fs_state)(struct pipe_context *, void *);
109    void (*driver_delete_fs_state)(struct pipe_context *, void *);
110 };
111 
112 
113 
114 /**
115  * Subclass of tgsi_transform_context, used for transforming the
116  * user's fragment shader to add the special AA instructions.
117  */
118 struct aa_transform_context {
119    struct tgsi_transform_context base;
120    uint tempsUsed;  /**< bitmask */
121    int colorOutput; /**< which output is the primary color */
122    int maxInput, maxGeneric;  /**< max input index found */
123    int tmp0, colorTemp;  /**< temp registers */
124    boolean firstInstruction;
125 };
126 
127 
128 /**
129  * TGSI declaration transform callback.
130  * Look for two free temp regs and available input reg for new texcoords.
131  */
132 static void
aa_transform_decl(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)133 aa_transform_decl(struct tgsi_transform_context *ctx,
134                   struct tgsi_full_declaration *decl)
135 {
136    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
137 
138    if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
139        decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
140        decl->Semantic.Index == 0) {
141       aactx->colorOutput = decl->Range.First;
142    }
143    else if (decl->Declaration.File == TGSI_FILE_INPUT) {
144       if ((int) decl->Range.Last > aactx->maxInput)
145          aactx->maxInput = decl->Range.Last;
146       if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
147            (int) decl->Semantic.Index > aactx->maxGeneric) {
148          aactx->maxGeneric = decl->Semantic.Index;
149       }
150    }
151    else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
152       uint i;
153       for (i = decl->Range.First;
154            i <= decl->Range.Last; i++) {
155          aactx->tempsUsed |= (1 << i);
156       }
157    }
158 
159    ctx->emit_declaration(ctx, decl);
160 }
161 
162 
163 /**
164  * TGSI instruction transform callback.
165  * Replace writes to result.color w/ a temp reg.
166  * Upon END instruction, insert texture sampling code for antialiasing.
167  */
168 static void
aa_transform_inst(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)169 aa_transform_inst(struct tgsi_transform_context *ctx,
170                   struct tgsi_full_instruction *inst)
171 {
172    struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
173    struct tgsi_full_instruction newInst;
174 
175    if (aactx->firstInstruction) {
176       /* emit our new declarations before the first instruction */
177 
178       struct tgsi_full_declaration decl;
179       const int texInput = aactx->maxInput + 1;
180       int tmp0;
181       uint i;
182 
183       /* find two free temp regs */
184       for (i = 0; i < 32; i++) {
185          if ((aactx->tempsUsed & (1 << i)) == 0) {
186             /* found a free temp */
187             if (aactx->tmp0 < 0)
188                aactx->tmp0 = i;
189             else if (aactx->colorTemp < 0)
190                aactx->colorTemp = i;
191             else
192                break;
193          }
194       }
195 
196       assert(aactx->colorTemp != aactx->tmp0);
197 
198       tmp0 = aactx->tmp0;
199 
200       /* declare new generic input/texcoord */
201       decl = tgsi_default_full_declaration();
202       decl.Declaration.File = TGSI_FILE_INPUT;
203       /* XXX this could be linear... */
204       decl.Declaration.Interpolate = 1;
205       decl.Declaration.Semantic = 1;
206       decl.Semantic.Name = TGSI_SEMANTIC_GENERIC;
207       decl.Semantic.Index = aactx->maxGeneric + 1;
208       decl.Range.First =
209       decl.Range.Last = texInput;
210       decl.Interp.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
211       ctx->emit_declaration(ctx, &decl);
212 
213       /* declare new temp regs */
214       decl = tgsi_default_full_declaration();
215       decl.Declaration.File = TGSI_FILE_TEMPORARY;
216       decl.Range.First =
217       decl.Range.Last = tmp0;
218       ctx->emit_declaration(ctx, &decl);
219 
220       decl = tgsi_default_full_declaration();
221       decl.Declaration.File = TGSI_FILE_TEMPORARY;
222       decl.Range.First =
223       decl.Range.Last = aactx->colorTemp;
224       ctx->emit_declaration(ctx, &decl);
225 
226       aactx->firstInstruction = FALSE;
227 
228 
229       /*
230        * Emit code to compute fragment coverage, kill if outside point radius
231        *
232        * Temp reg0 usage:
233        *  t0.x = distance of fragment from center point
234        *  t0.y = boolean, is t0.x > 1.0, also misc temp usage
235        *  t0.z = temporary for computing 1/(1-k) value
236        *  t0.w = final coverage value
237        */
238 
239       /* MUL t0.xy, tex, tex;  # compute x^2, y^2 */
240       newInst = tgsi_default_full_instruction();
241       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
242       newInst.Instruction.NumDstRegs = 1;
243       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
244       newInst.Dst[0].Register.Index = tmp0;
245       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XY;
246       newInst.Instruction.NumSrcRegs = 2;
247       newInst.Src[0].Register.File = TGSI_FILE_INPUT;
248       newInst.Src[0].Register.Index = texInput;
249       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
250       newInst.Src[1].Register.Index = texInput;
251       ctx->emit_instruction(ctx, &newInst);
252 
253       /* ADD t0.x, t0.x, t0.y;  # x^2 + y^2 */
254       newInst = tgsi_default_full_instruction();
255       newInst.Instruction.Opcode = TGSI_OPCODE_ADD;
256       newInst.Instruction.NumDstRegs = 1;
257       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
258       newInst.Dst[0].Register.Index = tmp0;
259       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
260       newInst.Instruction.NumSrcRegs = 2;
261       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
262       newInst.Src[0].Register.Index = tmp0;
263       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
264       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
265       newInst.Src[1].Register.Index = tmp0;
266       newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_Y;
267       ctx->emit_instruction(ctx, &newInst);
268 
269 #if NORMALIZE  /* OPTIONAL normalization of length */
270       /* RSQ t0.x, t0.x; */
271       newInst = tgsi_default_full_instruction();
272       newInst.Instruction.Opcode = TGSI_OPCODE_RSQ;
273       newInst.Instruction.NumDstRegs = 1;
274       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
275       newInst.Dst[0].Register.Index = tmp0;
276       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
277       newInst.Instruction.NumSrcRegs = 1;
278       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
279       newInst.Src[0].Register.Index = tmp0;
280       ctx->emit_instruction(ctx, &newInst);
281 
282       /* RCP t0.x, t0.x; */
283       newInst = tgsi_default_full_instruction();
284       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
285       newInst.Instruction.NumDstRegs = 1;
286       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
287       newInst.Dst[0].Register.Index = tmp0;
288       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_X;
289       newInst.Instruction.NumSrcRegs = 1;
290       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
291       newInst.Src[0].Register.Index = tmp0;
292       ctx->emit_instruction(ctx, &newInst);
293 #endif
294 
295       /* SGT t0.y, t0.xxxx, tex.wwww;  # bool b = d > 1 (NOTE tex.w == 1) */
296       newInst = tgsi_default_full_instruction();
297       newInst.Instruction.Opcode = TGSI_OPCODE_SGT;
298       newInst.Instruction.NumDstRegs = 1;
299       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
300       newInst.Dst[0].Register.Index = tmp0;
301       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
302       newInst.Instruction.NumSrcRegs = 2;
303       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
304       newInst.Src[0].Register.Index = tmp0;
305       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
306       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
307       newInst.Src[1].Register.Index = texInput;
308       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
309       ctx->emit_instruction(ctx, &newInst);
310 
311       /* KIL -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
312       newInst = tgsi_default_full_instruction();
313       newInst.Instruction.Opcode = TGSI_OPCODE_KIL;
314       newInst.Instruction.NumDstRegs = 0;
315       newInst.Instruction.NumSrcRegs = 1;
316       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
317       newInst.Src[0].Register.Index = tmp0;
318       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
319       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
320       newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
321       newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
322       newInst.Src[0].Register.Negate = 1;
323       ctx->emit_instruction(ctx, &newInst);
324 
325 
326       /* compute coverage factor = (1-d)/(1-k) */
327 
328       /* SUB t0.z, tex.w, tex.z;  # m = 1 - k */
329       newInst = tgsi_default_full_instruction();
330       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
331       newInst.Instruction.NumDstRegs = 1;
332       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
333       newInst.Dst[0].Register.Index = tmp0;
334       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
335       newInst.Instruction.NumSrcRegs = 2;
336       newInst.Src[0].Register.File = TGSI_FILE_INPUT;
337       newInst.Src[0].Register.Index = texInput;
338       newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_W;
339       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
340       newInst.Src[1].Register.Index = texInput;
341       newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
342       ctx->emit_instruction(ctx, &newInst);
343 
344       /* RCP t0.z, t0.z;  # t0.z = 1 / m */
345       newInst = tgsi_default_full_instruction();
346       newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
347       newInst.Instruction.NumDstRegs = 1;
348       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
349       newInst.Dst[0].Register.Index = tmp0;
350       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
351       newInst.Instruction.NumSrcRegs = 1;
352       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
353       newInst.Src[0].Register.Index = tmp0;
354       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
355       ctx->emit_instruction(ctx, &newInst);
356 
357       /* SUB t0.y, 1, t0.x;  # d = 1 - d */
358       newInst = tgsi_default_full_instruction();
359       newInst.Instruction.Opcode = TGSI_OPCODE_SUB;
360       newInst.Instruction.NumDstRegs = 1;
361       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
362       newInst.Dst[0].Register.Index = tmp0;
363       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
364       newInst.Instruction.NumSrcRegs = 2;
365       newInst.Src[0].Register.File = TGSI_FILE_INPUT;
366       newInst.Src[0].Register.Index = texInput;
367       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_W;
368       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
369       newInst.Src[1].Register.Index = tmp0;
370       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X;
371       ctx->emit_instruction(ctx, &newInst);
372 
373       /* MUL t0.w, t0.y, t0.z;   # coverage = d * m */
374       newInst = tgsi_default_full_instruction();
375       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
376       newInst.Instruction.NumDstRegs = 1;
377       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
378       newInst.Dst[0].Register.Index = tmp0;
379       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
380       newInst.Instruction.NumSrcRegs = 2;
381       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
382       newInst.Src[0].Register.Index = tmp0;
383       newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
384       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
385       newInst.Src[1].Register.Index = tmp0;
386       newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_Z;
387       ctx->emit_instruction(ctx, &newInst);
388 
389       /* SLE t0.y, t0.x, tex.z;  # bool b = distance <= k */
390       newInst = tgsi_default_full_instruction();
391       newInst.Instruction.Opcode = TGSI_OPCODE_SLE;
392       newInst.Instruction.NumDstRegs = 1;
393       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
394       newInst.Dst[0].Register.Index = tmp0;
395       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Y;
396       newInst.Instruction.NumSrcRegs = 2;
397       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
398       newInst.Src[0].Register.Index = tmp0;
399       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
400       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
401       newInst.Src[1].Register.Index = texInput;
402       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Z;
403       ctx->emit_instruction(ctx, &newInst);
404 
405       /* CMP t0.w, -t0.y, tex.w, t0.w;
406        *  # if -t0.y < 0 then
407        *       t0.w = 1
408        *    else
409        *       t0.w = t0.w
410        */
411       newInst = tgsi_default_full_instruction();
412       newInst.Instruction.Opcode = TGSI_OPCODE_CMP;
413       newInst.Instruction.NumDstRegs = 1;
414       newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
415       newInst.Dst[0].Register.Index = tmp0;
416       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
417       newInst.Instruction.NumSrcRegs = 3;
418       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
419       newInst.Src[0].Register.Index = tmp0;
420       newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Y;
421       newInst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_Y;
422       newInst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_Y;
423       newInst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_Y;
424       newInst.Src[0].Register.Negate = 1;
425       newInst.Src[1].Register.File = TGSI_FILE_INPUT;
426       newInst.Src[1].Register.Index = texInput;
427       newInst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_W;
428       newInst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_W;
429       newInst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_W;
430       newInst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
431       newInst.Src[2].Register.File = TGSI_FILE_TEMPORARY;
432       newInst.Src[2].Register.Index = tmp0;
433       newInst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_W;
434       newInst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_W;
435       newInst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_W;
436       newInst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
437       ctx->emit_instruction(ctx, &newInst);
438 
439    }
440 
441    if (inst->Instruction.Opcode == TGSI_OPCODE_END) {
442       /* add alpha modulation code at tail of program */
443 
444       /* MOV result.color.xyz, colorTemp; */
445       newInst = tgsi_default_full_instruction();
446       newInst.Instruction.Opcode = TGSI_OPCODE_MOV;
447       newInst.Instruction.NumDstRegs = 1;
448       newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
449       newInst.Dst[0].Register.Index = aactx->colorOutput;
450       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZ;
451       newInst.Instruction.NumSrcRegs = 1;
452       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
453       newInst.Src[0].Register.Index = aactx->colorTemp;
454       ctx->emit_instruction(ctx, &newInst);
455 
456       /* MUL result.color.w, colorTemp, tmp0.w; */
457       newInst = tgsi_default_full_instruction();
458       newInst.Instruction.Opcode = TGSI_OPCODE_MUL;
459       newInst.Instruction.NumDstRegs = 1;
460       newInst.Dst[0].Register.File = TGSI_FILE_OUTPUT;
461       newInst.Dst[0].Register.Index = aactx->colorOutput;
462       newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_W;
463       newInst.Instruction.NumSrcRegs = 2;
464       newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
465       newInst.Src[0].Register.Index = aactx->colorTemp;
466       newInst.Src[1].Register.File = TGSI_FILE_TEMPORARY;
467       newInst.Src[1].Register.Index = aactx->tmp0;
468       ctx->emit_instruction(ctx, &newInst);
469    }
470    else {
471       /* Not an END instruction.
472        * Look for writes to result.color and replace with colorTemp reg.
473        */
474       uint i;
475 
476       for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
477          struct tgsi_full_dst_register *dst = &inst->Dst[i];
478          if (dst->Register.File == TGSI_FILE_OUTPUT &&
479              dst->Register.Index == aactx->colorOutput) {
480             dst->Register.File = TGSI_FILE_TEMPORARY;
481             dst->Register.Index = aactx->colorTemp;
482          }
483       }
484    }
485 
486    ctx->emit_instruction(ctx, inst);
487 }
488 
489 
490 /**
491  * Generate the frag shader we'll use for drawing AA points.
492  * This will be the user's shader plus some texture/modulate instructions.
493  */
494 static boolean
generate_aapoint_fs(struct aapoint_stage * aapoint)495 generate_aapoint_fs(struct aapoint_stage *aapoint)
496 {
497    const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
498    struct pipe_shader_state aapoint_fs;
499    struct aa_transform_context transform;
500    const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
501    struct pipe_context *pipe = aapoint->stage.draw->pipe;
502 
503    aapoint_fs = *orig_fs; /* copy to init */
504    aapoint_fs.tokens = tgsi_alloc_tokens(newLen);
505    if (aapoint_fs.tokens == NULL)
506       return FALSE;
507 
508    memset(&transform, 0, sizeof(transform));
509    transform.colorOutput = -1;
510    transform.maxInput = -1;
511    transform.maxGeneric = -1;
512    transform.colorTemp = -1;
513    transform.tmp0 = -1;
514    transform.firstInstruction = TRUE;
515    transform.base.transform_instruction = aa_transform_inst;
516    transform.base.transform_declaration = aa_transform_decl;
517 
518    tgsi_transform_shader(orig_fs->tokens,
519                          (struct tgsi_token *) aapoint_fs.tokens,
520                          newLen, &transform.base);
521 
522 #if 0 /* DEBUG */
523    debug_printf("draw_aapoint, orig shader:\n");
524    tgsi_dump(orig_fs->tokens, 0);
525    debug_printf("draw_aapoint, new shader:\n");
526    tgsi_dump(aapoint_fs.tokens, 0);
527 #endif
528 
529    aapoint->fs->aapoint_fs
530       = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
531    if (aapoint->fs->aapoint_fs == NULL)
532       goto fail;
533 
534    aapoint->fs->generic_attrib = transform.maxGeneric + 1;
535    FREE((void *)aapoint_fs.tokens);
536    return TRUE;
537 
538 fail:
539    FREE((void *)aapoint_fs.tokens);
540    return FALSE;
541 }
542 
543 
544 /**
545  * When we're about to draw our first AA point in a batch, this function is
546  * called to tell the driver to bind our modified fragment shader.
547  */
548 static boolean
bind_aapoint_fragment_shader(struct aapoint_stage * aapoint)549 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
550 {
551    struct draw_context *draw = aapoint->stage.draw;
552    struct pipe_context *pipe = draw->pipe;
553 
554    if (!aapoint->fs->aapoint_fs &&
555        !generate_aapoint_fs(aapoint))
556       return FALSE;
557 
558    draw->suspend_flushing = TRUE;
559    aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
560    draw->suspend_flushing = FALSE;
561 
562    return TRUE;
563 }
564 
565 
566 
567 static INLINE struct aapoint_stage *
aapoint_stage(struct draw_stage * stage)568 aapoint_stage( struct draw_stage *stage )
569 {
570    return (struct aapoint_stage *) stage;
571 }
572 
573 
574 
575 
576 /**
577  * Draw an AA point by drawing a quad.
578  */
579 static void
aapoint_point(struct draw_stage * stage,struct prim_header * header)580 aapoint_point(struct draw_stage *stage, struct prim_header *header)
581 {
582    const struct aapoint_stage *aapoint = aapoint_stage(stage);
583    struct prim_header tri;
584    struct vertex_header *v[4];
585    const uint tex_slot = aapoint->tex_slot;
586    const uint pos_slot = aapoint->pos_slot;
587    float radius, *pos, *tex;
588    uint i;
589    float k;
590 
591    if (aapoint->psize_slot >= 0) {
592       radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
593    }
594    else {
595       radius = aapoint->radius;
596    }
597 
598    /*
599     * Note: the texcoords (generic attrib, really) we use are special:
600     * The S and T components simply vary from -1 to +1.
601     * The R component is k, below.
602     * The Q component is 1.0 and will used as a handy constant in the
603     * fragment shader.
604     */
605 
606    /*
607     * k is the threshold distance from the point's center at which
608     * we begin alpha attenuation (the coverage value).
609     * Operating within a unit circle, we'll compute the fragment's
610     * distance 'd' from the center point using the texcoords.
611     * IF d > 1.0 THEN
612     *    KILL fragment
613     * ELSE IF d > k THEN
614     *    compute coverage in [0,1] proportional to d in [k, 1].
615     * ELSE
616     *    coverage = 1.0;  // full coverage
617     * ENDIF
618     *
619     * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
620     * avoid using IF/ELSE/ENDIF TGSI opcodes.
621     */
622 
623 #if !NORMALIZE
624    k = 1.0f / radius;
625    k = 1.0f - 2.0f * k + k * k;
626 #else
627    k = 1.0f - 1.0f / radius;
628 #endif
629 
630    /* allocate/dup new verts */
631    for (i = 0; i < 4; i++) {
632       v[i] = dup_vert(stage, header->v[0], i);
633    }
634 
635    /* new verts */
636    pos = v[0]->data[pos_slot];
637    pos[0] -= radius;
638    pos[1] -= radius;
639 
640    pos = v[1]->data[pos_slot];
641    pos[0] += radius;
642    pos[1] -= radius;
643 
644    pos = v[2]->data[pos_slot];
645    pos[0] += radius;
646    pos[1] += radius;
647 
648    pos = v[3]->data[pos_slot];
649    pos[0] -= radius;
650    pos[1] += radius;
651 
652    /* new texcoords */
653    tex = v[0]->data[tex_slot];
654    ASSIGN_4V(tex, -1, -1, k, 1);
655 
656    tex = v[1]->data[tex_slot];
657    ASSIGN_4V(tex,  1, -1, k, 1);
658 
659    tex = v[2]->data[tex_slot];
660    ASSIGN_4V(tex,  1,  1, k, 1);
661 
662    tex = v[3]->data[tex_slot];
663    ASSIGN_4V(tex, -1,  1, k, 1);
664 
665    /* emit 2 tris for the quad strip */
666    tri.v[0] = v[0];
667    tri.v[1] = v[1];
668    tri.v[2] = v[2];
669    stage->next->tri( stage->next, &tri );
670 
671    tri.v[0] = v[0];
672    tri.v[1] = v[2];
673    tri.v[2] = v[3];
674    stage->next->tri( stage->next, &tri );
675 }
676 
677 
678 static void
aapoint_first_point(struct draw_stage * stage,struct prim_header * header)679 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
680 {
681    auto struct aapoint_stage *aapoint = aapoint_stage(stage);
682    struct draw_context *draw = stage->draw;
683    struct pipe_context *pipe = draw->pipe;
684    const struct pipe_rasterizer_state *rast = draw->rasterizer;
685    void *r;
686 
687    assert(draw->rasterizer->point_smooth);
688 
689    if (draw->rasterizer->point_size <= 2.0)
690       aapoint->radius = 1.0;
691    else
692       aapoint->radius = 0.5f * draw->rasterizer->point_size;
693 
694    /*
695     * Bind (generate) our fragprog.
696     */
697    bind_aapoint_fragment_shader(aapoint);
698 
699    /* update vertex attrib info */
700    aapoint->pos_slot = draw_current_shader_position_output(draw);
701 
702    /* allocate the extra post-transformed vertex attribute */
703    aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
704                                                       TGSI_SEMANTIC_GENERIC,
705                                                       aapoint->fs->generic_attrib);
706    assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
707 
708    /* find psize slot in post-transform vertex */
709    aapoint->psize_slot = -1;
710    if (draw->rasterizer->point_size_per_vertex) {
711       const struct tgsi_shader_info *info = draw_get_shader_info(draw);
712       uint i;
713       /* find PSIZ vertex output */
714       for (i = 0; i < info->num_outputs; i++) {
715          if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
716             aapoint->psize_slot = i;
717             break;
718          }
719       }
720    }
721 
722    draw->suspend_flushing = TRUE;
723 
724    /* Disable triangle culling, stippling, unfilled mode etc. */
725    r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
726    pipe->bind_rasterizer_state(pipe, r);
727 
728    draw->suspend_flushing = FALSE;
729 
730    /* now really draw first point */
731    stage->point = aapoint_point;
732    stage->point(stage, header);
733 }
734 
735 
736 static void
aapoint_flush(struct draw_stage * stage,unsigned flags)737 aapoint_flush(struct draw_stage *stage, unsigned flags)
738 {
739    struct draw_context *draw = stage->draw;
740    struct aapoint_stage *aapoint = aapoint_stage(stage);
741    struct pipe_context *pipe = draw->pipe;
742 
743    stage->point = aapoint_first_point;
744    stage->next->flush( stage->next, flags );
745 
746    /* restore original frag shader */
747    draw->suspend_flushing = TRUE;
748    aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL);
749 
750    /* restore original rasterizer state */
751    if (draw->rast_handle) {
752       pipe->bind_rasterizer_state(pipe, draw->rast_handle);
753    }
754 
755    draw->suspend_flushing = FALSE;
756 
757    draw_remove_extra_vertex_attribs(draw);
758 }
759 
760 
761 static void
aapoint_reset_stipple_counter(struct draw_stage * stage)762 aapoint_reset_stipple_counter(struct draw_stage *stage)
763 {
764    stage->next->reset_stipple_counter( stage->next );
765 }
766 
767 
768 static void
aapoint_destroy(struct draw_stage * stage)769 aapoint_destroy(struct draw_stage *stage)
770 {
771    struct aapoint_stage* aapoint = aapoint_stage(stage);
772    struct pipe_context *pipe = stage->draw->pipe;
773 
774    draw_free_temp_verts( stage );
775 
776    /* restore the old entry points */
777    pipe->create_fs_state = aapoint->driver_create_fs_state;
778    pipe->bind_fs_state = aapoint->driver_bind_fs_state;
779    pipe->delete_fs_state = aapoint->driver_delete_fs_state;
780 
781    FREE( stage );
782 }
783 
784 
785 static struct aapoint_stage *
draw_aapoint_stage(struct draw_context * draw)786 draw_aapoint_stage(struct draw_context *draw)
787 {
788    struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
789    if (aapoint == NULL)
790       goto fail;
791 
792    aapoint->stage.draw = draw;
793    aapoint->stage.name = "aapoint";
794    aapoint->stage.next = NULL;
795    aapoint->stage.point = aapoint_first_point;
796    aapoint->stage.line = draw_pipe_passthrough_line;
797    aapoint->stage.tri = draw_pipe_passthrough_tri;
798    aapoint->stage.flush = aapoint_flush;
799    aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
800    aapoint->stage.destroy = aapoint_destroy;
801 
802    if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
803       goto fail;
804 
805    return aapoint;
806 
807  fail:
808    if (aapoint)
809       aapoint->stage.destroy(&aapoint->stage);
810 
811    return NULL;
812 
813 }
814 
815 
816 static struct aapoint_stage *
aapoint_stage_from_pipe(struct pipe_context * pipe)817 aapoint_stage_from_pipe(struct pipe_context *pipe)
818 {
819    struct draw_context *draw = (struct draw_context *) pipe->draw;
820    return aapoint_stage(draw->pipeline.aapoint);
821 }
822 
823 
824 /**
825  * This function overrides the driver's create_fs_state() function and
826  * will typically be called by the state tracker.
827  */
828 static void *
aapoint_create_fs_state(struct pipe_context * pipe,const struct pipe_shader_state * fs)829 aapoint_create_fs_state(struct pipe_context *pipe,
830                        const struct pipe_shader_state *fs)
831 {
832    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
833    struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
834    if (aafs == NULL)
835       return NULL;
836 
837    aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
838 
839    /* pass-through */
840    aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
841 
842    return aafs;
843 }
844 
845 
846 static void
aapoint_bind_fs_state(struct pipe_context * pipe,void * fs)847 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
848 {
849    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
850    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
851    /* save current */
852    aapoint->fs = aafs;
853    /* pass-through */
854    aapoint->driver_bind_fs_state(pipe,
855                                  (aafs ? aafs->driver_fs : NULL));
856 }
857 
858 
859 static void
aapoint_delete_fs_state(struct pipe_context * pipe,void * fs)860 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
861 {
862    struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
863    struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
864 
865    /* pass-through */
866    aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
867 
868    if (aafs->aapoint_fs)
869       aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
870 
871    FREE((void*)aafs->state.tokens);
872 
873    FREE(aafs);
874 }
875 
876 
877 /**
878  * Called by drivers that want to install this AA point prim stage
879  * into the draw module's pipeline.  This will not be used if the
880  * hardware has native support for AA points.
881  */
882 boolean
draw_install_aapoint_stage(struct draw_context * draw,struct pipe_context * pipe)883 draw_install_aapoint_stage(struct draw_context *draw,
884                            struct pipe_context *pipe)
885 {
886    struct aapoint_stage *aapoint;
887 
888    pipe->draw = (void *) draw;
889 
890    /*
891     * Create / install AA point drawing / prim stage
892     */
893    aapoint = draw_aapoint_stage( draw );
894    if (aapoint == NULL)
895       return FALSE;
896 
897    /* save original driver functions */
898    aapoint->driver_create_fs_state = pipe->create_fs_state;
899    aapoint->driver_bind_fs_state = pipe->bind_fs_state;
900    aapoint->driver_delete_fs_state = pipe->delete_fs_state;
901 
902    /* override the driver's functions */
903    pipe->create_fs_state = aapoint_create_fs_state;
904    pipe->bind_fs_state = aapoint_bind_fs_state;
905    pipe->delete_fs_state = aapoint_delete_fs_state;
906 
907    draw->pipeline.aapoint = &aapoint->stage;
908 
909    return TRUE;
910 }
911