1 /**************************************************************************
2 *
3 * Copyright 2008 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * AA point stage: AA points are converted to quads and rendered with a
30 * special fragment shader. Another approach would be to use a texture
31 * map image of a point, but experiments indicate the quality isn't nearly
32 * as good as this approach.
33 *
34 * Note: this looks a lot like draw_aaline.c but there's actually little
35 * if any code that can be shared.
36 *
37 * Authors: Brian Paul
38 */
39
40
41 #include "pipe/p_context.h"
42 #include "pipe/p_defines.h"
43 #include "pipe/p_shader_tokens.h"
44
45 #include "tgsi/tgsi_transform.h"
46 #include "tgsi/tgsi_dump.h"
47
48 #include "util/u_math.h"
49 #include "util/u_memory.h"
50
51 #include "draw_context.h"
52 #include "draw_vs.h"
53 #include "draw_pipe.h"
54
55 #include "nir.h"
56 #include "nir/nir_draw_helpers.h"
57
58 /** Approx number of new tokens for instructions in aa_transform_inst() */
59 #define NUM_NEW_TOKENS 200
60
61
62 /*
63 * Enabling NORMALIZE might give _slightly_ better results.
64 * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or
65 * d=x*x+y*y. Since we're working with a unit circle, the later seems
66 * close enough and saves some costly instructions.
67 */
68 #define NORMALIZE 0
69
70
71 /**
72 * Subclass of pipe_shader_state to carry extra fragment shader info.
73 */
74 struct aapoint_fragment_shader
75 {
76 struct pipe_shader_state state;
77 void *driver_fs; /**< the regular shader */
78 void *aapoint_fs; /**< the aa point-augmented shader */
79 int generic_attrib; /**< The generic input attrib/texcoord we'll use */
80 };
81
82
83 /**
84 * Subclass of draw_stage
85 */
86 struct aapoint_stage
87 {
88 struct draw_stage stage;
89
90 /** half of pipe_rasterizer_state::point_size */
91 float radius;
92
93 /** vertex attrib slot containing point size */
94 int psize_slot;
95
96 /** this is the vertex attrib slot for the new texcoords */
97 uint tex_slot;
98
99 /** vertex attrib slot containing position */
100 uint pos_slot;
101
102 /** Currently bound fragment shader */
103 struct aapoint_fragment_shader *fs;
104
105 /*
106 * Driver interface/override functions
107 */
108 void * (*driver_create_fs_state)(struct pipe_context *,
109 const struct pipe_shader_state *);
110 void (*driver_bind_fs_state)(struct pipe_context *, void *);
111 void (*driver_delete_fs_state)(struct pipe_context *, void *);
112 };
113
114
115
116 /**
117 * Subclass of tgsi_transform_context, used for transforming the
118 * user's fragment shader to add the special AA instructions.
119 */
120 struct aa_transform_context {
121 struct tgsi_transform_context base;
122 uint tempsUsed; /**< bitmask */
123 int colorOutput; /**< which output is the primary color */
124 int maxInput, maxGeneric; /**< max input index found */
125 int tmp0, colorTemp; /**< temp registers */
126 };
127
128
129 /**
130 * TGSI declaration transform callback.
131 * Look for two free temp regs and available input reg for new texcoords.
132 */
133 static void
aa_transform_decl(struct tgsi_transform_context * ctx,struct tgsi_full_declaration * decl)134 aa_transform_decl(struct tgsi_transform_context *ctx,
135 struct tgsi_full_declaration *decl)
136 {
137 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
138
139 if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
140 decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
141 decl->Semantic.Index == 0) {
142 aactx->colorOutput = decl->Range.First;
143 }
144 else if (decl->Declaration.File == TGSI_FILE_INPUT) {
145 if ((int) decl->Range.Last > aactx->maxInput)
146 aactx->maxInput = decl->Range.Last;
147 if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
148 (int) decl->Semantic.Index > aactx->maxGeneric) {
149 aactx->maxGeneric = decl->Semantic.Index;
150 }
151 }
152 else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
153 uint i;
154 for (i = decl->Range.First;
155 i <= decl->Range.Last; i++) {
156 aactx->tempsUsed |= (1 << i);
157 }
158 }
159
160 ctx->emit_declaration(ctx, decl);
161 }
162
163
164 /**
165 * TGSI transform callback.
166 * Insert new declarations and instructions before first instruction.
167 */
168 static void
aa_transform_prolog(struct tgsi_transform_context * ctx)169 aa_transform_prolog(struct tgsi_transform_context *ctx)
170 {
171 /* emit our new declarations before the first instruction */
172 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
173 struct tgsi_full_instruction newInst;
174 const int texInput = aactx->maxInput + 1;
175 int tmp0;
176 uint i;
177
178 /* find two free temp regs */
179 for (i = 0; i < 32; i++) {
180 if ((aactx->tempsUsed & (1u << i)) == 0) {
181 /* found a free temp */
182 if (aactx->tmp0 < 0)
183 aactx->tmp0 = i;
184 else if (aactx->colorTemp < 0)
185 aactx->colorTemp = i;
186 else
187 break;
188 }
189 }
190
191 assert(aactx->colorTemp != aactx->tmp0);
192
193 tmp0 = aactx->tmp0;
194
195 /* declare new generic input/texcoord */
196 tgsi_transform_input_decl(ctx, texInput,
197 TGSI_SEMANTIC_GENERIC, aactx->maxGeneric + 1,
198 TGSI_INTERPOLATE_LINEAR);
199
200 /* declare new temp regs */
201 tgsi_transform_temp_decl(ctx, tmp0);
202 tgsi_transform_temp_decl(ctx, aactx->colorTemp);
203
204 /*
205 * Emit code to compute fragment coverage, kill if outside point radius
206 *
207 * Temp reg0 usage:
208 * t0.x = distance of fragment from center point
209 * t0.y = boolean, is t0.x > 1.0, also misc temp usage
210 * t0.z = temporary for computing 1/(1-k) value
211 * t0.w = final coverage value
212 */
213
214 /* MUL t0.xy, tex, tex; # compute x^2, y^2 */
215 tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
216 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
217 TGSI_FILE_INPUT, texInput,
218 TGSI_FILE_INPUT, texInput, false);
219
220 /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */
221 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
222 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
223 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
224 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, false);
225
226 #if NORMALIZE /* OPTIONAL normalization of length */
227 /* RSQ t0.x, t0.x; */
228 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RSQ,
229 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
230 TGSI_FILE_TEMPORARY, tmp0);
231
232 /* RCP t0.x, t0.x; */
233 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_RCP,
234 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
235 TGSI_FILE_TEMPORARY, tmp0);
236 #endif
237
238 /* SGT t0.y, t0.xxxx, tex.wwww; # bool b = d > 1 (NOTE tex.w == 1) */
239 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SGT,
240 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
241 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
242 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W, false);
243
244 /* KILL_IF -tmp0.yyyy; # if -tmp0.y < 0, KILL */
245 tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
246 TGSI_SWIZZLE_Y, TRUE);
247
248 /* compute coverage factor = (1-d)/(1-k) */
249
250 /* SUB t0.z, tex.w, tex.z; # m = 1 - k */
251 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
252 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Z,
253 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
254 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, true);
255
256 /* RCP t0.z, t0.z; # t0.z = 1 / m */
257 newInst = tgsi_default_full_instruction();
258 newInst.Instruction.Opcode = TGSI_OPCODE_RCP;
259 newInst.Instruction.NumDstRegs = 1;
260 newInst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
261 newInst.Dst[0].Register.Index = tmp0;
262 newInst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_Z;
263 newInst.Instruction.NumSrcRegs = 1;
264 newInst.Src[0].Register.File = TGSI_FILE_TEMPORARY;
265 newInst.Src[0].Register.Index = tmp0;
266 newInst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_Z;
267 ctx->emit_instruction(ctx, &newInst);
268
269 /* SUB t0.y, 1, t0.x; # d = 1 - d */
270 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_ADD,
271 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
272 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
273 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X, true);
274
275 /* MUL t0.w, t0.y, t0.z; # coverage = d * m */
276 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
277 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
278 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
279 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Z, false);
280
281 /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */
282 tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SLE,
283 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
284 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X,
285 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_Z, false);
286
287 /* CMP t0.w, -t0.y, tex.w, t0.w;
288 * # if -t0.y < 0 then
289 * t0.w = 1
290 * else
291 * t0.w = t0.w
292 */
293 tgsi_transform_op3_swz_inst(ctx, TGSI_OPCODE_CMP,
294 TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
295 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y, 1,
296 TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W,
297 TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
298 }
299
300
301 /**
302 * TGSI transform callback.
303 * Insert new instructions before the END instruction.
304 */
305 static void
aa_transform_epilog(struct tgsi_transform_context * ctx)306 aa_transform_epilog(struct tgsi_transform_context *ctx)
307 {
308 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
309
310 /* add alpha modulation code at tail of program */
311
312 /* MOV result.color.xyz, colorTemp; */
313 tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
314 TGSI_FILE_OUTPUT, aactx->colorOutput,
315 TGSI_WRITEMASK_XYZ,
316 TGSI_FILE_TEMPORARY, aactx->colorTemp);
317
318 /* MUL result.color.w, colorTemp, tmp0.w; */
319 tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
320 TGSI_FILE_OUTPUT, aactx->colorOutput,
321 TGSI_WRITEMASK_W,
322 TGSI_FILE_TEMPORARY, aactx->colorTemp,
323 TGSI_FILE_TEMPORARY, aactx->tmp0, false);
324 }
325
326
327 /**
328 * TGSI transform callback.
329 * Called per instruction.
330 * Replace writes to result.color w/ a temp reg.
331 */
332 static void
aa_transform_inst(struct tgsi_transform_context * ctx,struct tgsi_full_instruction * inst)333 aa_transform_inst(struct tgsi_transform_context *ctx,
334 struct tgsi_full_instruction *inst)
335 {
336 struct aa_transform_context *aactx = (struct aa_transform_context *) ctx;
337 unsigned i;
338
339 /* Not an END instruction.
340 * Look for writes to result.color and replace with colorTemp reg.
341 */
342 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
343 struct tgsi_full_dst_register *dst = &inst->Dst[i];
344 if (dst->Register.File == TGSI_FILE_OUTPUT &&
345 dst->Register.Index == aactx->colorOutput) {
346 dst->Register.File = TGSI_FILE_TEMPORARY;
347 dst->Register.Index = aactx->colorTemp;
348 }
349 }
350
351 ctx->emit_instruction(ctx, inst);
352 }
353
354
355 /**
356 * Generate the frag shader we'll use for drawing AA points.
357 * This will be the user's shader plus some texture/modulate instructions.
358 */
359 static boolean
generate_aapoint_fs(struct aapoint_stage * aapoint)360 generate_aapoint_fs(struct aapoint_stage *aapoint)
361 {
362 const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
363 struct pipe_shader_state aapoint_fs;
364 struct aa_transform_context transform;
365 const uint newLen = tgsi_num_tokens(orig_fs->tokens) + NUM_NEW_TOKENS;
366 struct pipe_context *pipe = aapoint->stage.draw->pipe;
367
368 aapoint_fs = *orig_fs; /* copy to init */
369
370 assert(aapoint_fs.type == PIPE_SHADER_IR_TGSI);
371
372 memset(&transform, 0, sizeof(transform));
373 transform.colorOutput = -1;
374 transform.maxInput = -1;
375 transform.maxGeneric = -1;
376 transform.colorTemp = -1;
377 transform.tmp0 = -1;
378 transform.base.prolog = aa_transform_prolog;
379 transform.base.epilog = aa_transform_epilog;
380 transform.base.transform_instruction = aa_transform_inst;
381 transform.base.transform_declaration = aa_transform_decl;
382
383 aapoint_fs.tokens = tgsi_transform_shader(orig_fs->tokens, newLen, &transform.base);
384 if (!aapoint_fs.tokens)
385 return false;
386
387 #if 0 /* DEBUG */
388 debug_printf("draw_aapoint, orig shader:\n");
389 tgsi_dump(orig_fs->tokens, 0);
390 debug_printf("draw_aapoint, new shader:\n");
391 tgsi_dump(aapoint_fs.tokens, 0);
392 #endif
393
394 aapoint->fs->aapoint_fs
395 = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
396 if (aapoint->fs->aapoint_fs == NULL)
397 goto fail;
398
399 aapoint->fs->generic_attrib = transform.maxGeneric + 1;
400 FREE((void *)aapoint_fs.tokens);
401 return TRUE;
402
403 fail:
404 FREE((void *)aapoint_fs.tokens);
405 return FALSE;
406 }
407
408 static boolean
generate_aapoint_fs_nir(struct aapoint_stage * aapoint)409 generate_aapoint_fs_nir(struct aapoint_stage *aapoint)
410 {
411 struct pipe_context *pipe = aapoint->stage.draw->pipe;
412 const struct pipe_shader_state *orig_fs = &aapoint->fs->state;
413 struct pipe_shader_state aapoint_fs;
414
415 aapoint_fs = *orig_fs; /* copy to init */
416 aapoint_fs.ir.nir = nir_shader_clone(NULL, orig_fs->ir.nir);
417 if (!aapoint_fs.ir.nir)
418 return FALSE;
419
420 nir_lower_aapoint_fs(aapoint_fs.ir.nir, &aapoint->fs->generic_attrib);
421 aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(pipe, &aapoint_fs);
422 if (aapoint->fs->aapoint_fs == NULL)
423 goto fail;
424
425 return TRUE;
426
427 fail:
428 return FALSE;
429 }
430
431 /**
432 * When we're about to draw our first AA point in a batch, this function is
433 * called to tell the driver to bind our modified fragment shader.
434 */
435 static boolean
bind_aapoint_fragment_shader(struct aapoint_stage * aapoint)436 bind_aapoint_fragment_shader(struct aapoint_stage *aapoint)
437 {
438 struct draw_context *draw = aapoint->stage.draw;
439 struct pipe_context *pipe = draw->pipe;
440
441 if (!aapoint->fs->aapoint_fs) {
442 if (aapoint->fs->state.type == PIPE_SHADER_IR_NIR) {
443 if (!generate_aapoint_fs_nir(aapoint))
444 return FALSE;
445 } else if (!generate_aapoint_fs(aapoint))
446 return FALSE;
447 }
448
449 draw->suspend_flushing = TRUE;
450 aapoint->driver_bind_fs_state(pipe, aapoint->fs->aapoint_fs);
451 draw->suspend_flushing = FALSE;
452
453 return TRUE;
454 }
455
456
457
458 static inline struct aapoint_stage *
aapoint_stage(struct draw_stage * stage)459 aapoint_stage( struct draw_stage *stage )
460 {
461 return (struct aapoint_stage *) stage;
462 }
463
464
465
466
467 /**
468 * Draw an AA point by drawing a quad.
469 */
470 static void
aapoint_point(struct draw_stage * stage,struct prim_header * header)471 aapoint_point(struct draw_stage *stage, struct prim_header *header)
472 {
473 const struct aapoint_stage *aapoint = aapoint_stage(stage);
474 struct prim_header tri;
475 struct vertex_header *v[4];
476 const uint tex_slot = aapoint->tex_slot;
477 const uint pos_slot = aapoint->pos_slot;
478 float radius, *pos, *tex;
479 uint i;
480 float k;
481
482 if (aapoint->psize_slot >= 0) {
483 radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0];
484 }
485 else {
486 radius = aapoint->radius;
487 }
488
489 /*
490 * Note: the texcoords (generic attrib, really) we use are special:
491 * The S and T components simply vary from -1 to +1.
492 * The R component is k, below.
493 * The Q component is 1.0 and will used as a handy constant in the
494 * fragment shader.
495 */
496
497 /*
498 * k is the threshold distance from the point's center at which
499 * we begin alpha attenuation (the coverage value).
500 * Operating within a unit circle, we'll compute the fragment's
501 * distance 'd' from the center point using the texcoords.
502 * IF d > 1.0 THEN
503 * KILL fragment
504 * ELSE IF d > k THEN
505 * compute coverage in [0,1] proportional to d in [k, 1].
506 * ELSE
507 * coverage = 1.0; // full coverage
508 * ENDIF
509 *
510 * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to
511 * avoid using IF/ELSE/ENDIF TGSI opcodes.
512 */
513
514 #if !NORMALIZE
515 k = 1.0f / radius;
516 k = 1.0f - 2.0f * k + k * k;
517 #else
518 k = 1.0f - 1.0f / radius;
519 #endif
520
521 /* allocate/dup new verts */
522 for (i = 0; i < 4; i++) {
523 v[i] = dup_vert(stage, header->v[0], i);
524 }
525
526 /* new verts */
527 pos = v[0]->data[pos_slot];
528 pos[0] -= radius;
529 pos[1] -= radius;
530
531 pos = v[1]->data[pos_slot];
532 pos[0] += radius;
533 pos[1] -= radius;
534
535 pos = v[2]->data[pos_slot];
536 pos[0] += radius;
537 pos[1] += radius;
538
539 pos = v[3]->data[pos_slot];
540 pos[0] -= radius;
541 pos[1] += radius;
542
543 /* new texcoords */
544 tex = v[0]->data[tex_slot];
545 ASSIGN_4V(tex, -1, -1, k, 1);
546
547 tex = v[1]->data[tex_slot];
548 ASSIGN_4V(tex, 1, -1, k, 1);
549
550 tex = v[2]->data[tex_slot];
551 ASSIGN_4V(tex, 1, 1, k, 1);
552
553 tex = v[3]->data[tex_slot];
554 ASSIGN_4V(tex, -1, 1, k, 1);
555
556 /* emit 2 tris for the quad strip */
557 tri.v[0] = v[0];
558 tri.v[1] = v[1];
559 tri.v[2] = v[2];
560 stage->next->tri( stage->next, &tri );
561
562 tri.v[0] = v[0];
563 tri.v[1] = v[2];
564 tri.v[2] = v[3];
565 stage->next->tri( stage->next, &tri );
566 }
567
568
569 static void
aapoint_first_point(struct draw_stage * stage,struct prim_header * header)570 aapoint_first_point(struct draw_stage *stage, struct prim_header *header)
571 {
572 auto struct aapoint_stage *aapoint = aapoint_stage(stage);
573 struct draw_context *draw = stage->draw;
574 struct pipe_context *pipe = draw->pipe;
575 const struct pipe_rasterizer_state *rast = draw->rasterizer;
576 void *r;
577
578 assert(draw->rasterizer->point_smooth && !draw->rasterizer->multisample);
579
580 if (draw->rasterizer->point_size <= 2.0)
581 aapoint->radius = 1.0;
582 else
583 aapoint->radius = 0.5f * draw->rasterizer->point_size;
584
585 /*
586 * Bind (generate) our fragprog.
587 */
588 bind_aapoint_fragment_shader(aapoint);
589
590 draw_aapoint_prepare_outputs(draw, draw->pipeline.aapoint);
591
592 draw->suspend_flushing = TRUE;
593
594 /* Disable triangle culling, stippling, unfilled mode etc. */
595 r = draw_get_rasterizer_no_cull(draw, rast);
596 pipe->bind_rasterizer_state(pipe, r);
597
598 draw->suspend_flushing = FALSE;
599
600 /* now really draw first point */
601 stage->point = aapoint_point;
602 stage->point(stage, header);
603 }
604
605
606 static void
aapoint_flush(struct draw_stage * stage,unsigned flags)607 aapoint_flush(struct draw_stage *stage, unsigned flags)
608 {
609 struct draw_context *draw = stage->draw;
610 struct aapoint_stage *aapoint = aapoint_stage(stage);
611 struct pipe_context *pipe = draw->pipe;
612
613 stage->point = aapoint_first_point;
614 stage->next->flush( stage->next, flags );
615
616 /* restore original frag shader */
617 draw->suspend_flushing = TRUE;
618 aapoint->driver_bind_fs_state(pipe, aapoint->fs ? aapoint->fs->driver_fs : NULL);
619
620 /* restore original rasterizer state */
621 if (draw->rast_handle) {
622 pipe->bind_rasterizer_state(pipe, draw->rast_handle);
623 }
624
625 draw->suspend_flushing = FALSE;
626
627 draw_remove_extra_vertex_attribs(draw);
628 }
629
630
631 static void
aapoint_reset_stipple_counter(struct draw_stage * stage)632 aapoint_reset_stipple_counter(struct draw_stage *stage)
633 {
634 stage->next->reset_stipple_counter( stage->next );
635 }
636
637
638 static void
aapoint_destroy(struct draw_stage * stage)639 aapoint_destroy(struct draw_stage *stage)
640 {
641 struct aapoint_stage* aapoint = aapoint_stage(stage);
642 struct pipe_context *pipe = stage->draw->pipe;
643
644 draw_free_temp_verts( stage );
645
646 /* restore the old entry points */
647 pipe->create_fs_state = aapoint->driver_create_fs_state;
648 pipe->bind_fs_state = aapoint->driver_bind_fs_state;
649 pipe->delete_fs_state = aapoint->driver_delete_fs_state;
650
651 FREE( stage );
652 }
653
654 void
draw_aapoint_prepare_outputs(struct draw_context * draw,struct draw_stage * stage)655 draw_aapoint_prepare_outputs(struct draw_context *draw,
656 struct draw_stage *stage)
657 {
658 struct aapoint_stage *aapoint = aapoint_stage(stage);
659 const struct pipe_rasterizer_state *rast = draw->rasterizer;
660
661 /* update vertex attrib info */
662 aapoint->pos_slot = draw_current_shader_position_output(draw);
663
664 if (!rast->point_smooth || rast->multisample)
665 return;
666
667 if (aapoint->fs && aapoint->fs->aapoint_fs) {
668 /* allocate the extra post-transformed vertex attribute */
669 aapoint->tex_slot = draw_alloc_extra_vertex_attrib(draw,
670 TGSI_SEMANTIC_GENERIC,
671 aapoint->fs->generic_attrib);
672 assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */
673 } else
674 aapoint->tex_slot = -1;
675
676 /* find psize slot in post-transform vertex */
677 aapoint->psize_slot = -1;
678 if (draw->rasterizer->point_size_per_vertex) {
679 const struct tgsi_shader_info *info = draw_get_shader_info(draw);
680 uint i;
681 /* find PSIZ vertex output */
682 for (i = 0; i < info->num_outputs; i++) {
683 if (info->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) {
684 aapoint->psize_slot = i;
685 break;
686 }
687 }
688 }
689 }
690
691 static struct aapoint_stage *
draw_aapoint_stage(struct draw_context * draw)692 draw_aapoint_stage(struct draw_context *draw)
693 {
694 struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage);
695 if (!aapoint)
696 goto fail;
697
698 aapoint->stage.draw = draw;
699 aapoint->stage.name = "aapoint";
700 aapoint->stage.next = NULL;
701 aapoint->stage.point = aapoint_first_point;
702 aapoint->stage.line = draw_pipe_passthrough_line;
703 aapoint->stage.tri = draw_pipe_passthrough_tri;
704 aapoint->stage.flush = aapoint_flush;
705 aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter;
706 aapoint->stage.destroy = aapoint_destroy;
707
708 if (!draw_alloc_temp_verts( &aapoint->stage, 4 ))
709 goto fail;
710
711 return aapoint;
712
713 fail:
714 if (aapoint)
715 aapoint->stage.destroy(&aapoint->stage);
716
717 return NULL;
718
719 }
720
721
722 static struct aapoint_stage *
aapoint_stage_from_pipe(struct pipe_context * pipe)723 aapoint_stage_from_pipe(struct pipe_context *pipe)
724 {
725 struct draw_context *draw = (struct draw_context *) pipe->draw;
726 return aapoint_stage(draw->pipeline.aapoint);
727 }
728
729
730 /**
731 * This function overrides the driver's create_fs_state() function and
732 * will typically be called by the gallium frontend.
733 */
734 static void *
aapoint_create_fs_state(struct pipe_context * pipe,const struct pipe_shader_state * fs)735 aapoint_create_fs_state(struct pipe_context *pipe,
736 const struct pipe_shader_state *fs)
737 {
738 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
739 struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader);
740 if (!aafs)
741 return NULL;
742
743 aafs->state.type = fs->type;
744 if (fs->type == PIPE_SHADER_IR_TGSI)
745 aafs->state.tokens = tgsi_dup_tokens(fs->tokens);
746 else
747 aafs->state.ir.nir = nir_shader_clone(NULL, fs->ir.nir);
748 /* pass-through */
749 aafs->driver_fs = aapoint->driver_create_fs_state(pipe, fs);
750
751 return aafs;
752 }
753
754
755 static void
aapoint_bind_fs_state(struct pipe_context * pipe,void * fs)756 aapoint_bind_fs_state(struct pipe_context *pipe, void *fs)
757 {
758 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
759 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
760 /* save current */
761 aapoint->fs = aafs;
762 /* pass-through */
763 aapoint->driver_bind_fs_state(pipe,
764 (aafs ? aafs->driver_fs : NULL));
765 }
766
767
768 static void
aapoint_delete_fs_state(struct pipe_context * pipe,void * fs)769 aapoint_delete_fs_state(struct pipe_context *pipe, void *fs)
770 {
771 struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe);
772 struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs;
773
774 /* pass-through */
775 aapoint->driver_delete_fs_state(pipe, aafs->driver_fs);
776
777 if (aafs->aapoint_fs)
778 aapoint->driver_delete_fs_state(pipe, aafs->aapoint_fs);
779
780 if (aafs->state.type == PIPE_SHADER_IR_TGSI)
781 FREE((void*)aafs->state.tokens);
782 else
783 ralloc_free(aafs->state.ir.nir);
784
785 FREE(aafs);
786 }
787
788
789 /**
790 * Called by drivers that want to install this AA point prim stage
791 * into the draw module's pipeline. This will not be used if the
792 * hardware has native support for AA points.
793 */
794 boolean
draw_install_aapoint_stage(struct draw_context * draw,struct pipe_context * pipe)795 draw_install_aapoint_stage(struct draw_context *draw,
796 struct pipe_context *pipe)
797 {
798 struct aapoint_stage *aapoint;
799
800 pipe->draw = (void *) draw;
801
802 /*
803 * Create / install AA point drawing / prim stage
804 */
805 aapoint = draw_aapoint_stage( draw );
806 if (!aapoint)
807 return FALSE;
808
809 /* save original driver functions */
810 aapoint->driver_create_fs_state = pipe->create_fs_state;
811 aapoint->driver_bind_fs_state = pipe->bind_fs_state;
812 aapoint->driver_delete_fs_state = pipe->delete_fs_state;
813
814 /* override the driver's functions */
815 pipe->create_fs_state = aapoint_create_fs_state;
816 pipe->bind_fs_state = aapoint_bind_fs_state;
817 pipe->delete_fs_state = aapoint_delete_fs_state;
818
819 draw->pipeline.aapoint = &aapoint->stage;
820
821 return TRUE;
822 }
823