1 /**************************************************************************
2 *
3 * Copyright 2007 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keithw@vmware.com>
31 */
32
33 #include "util/u_memory.h"
34 #include "util/u_math.h"
35 #include "draw/draw_context.h"
36 #include "draw/draw_private.h"
37 #include "draw/draw_vbuf.h"
38 #include "draw/draw_vertex.h"
39 #include "draw/draw_vs.h"
40 #include "translate/translate.h"
41
42 /* A first pass at incorporating vertex fetch/emit functionality into
43 */
44 struct draw_vs_variant_generic {
45 struct draw_vs_variant base;
46
47 struct draw_vertex_shader *shader;
48 struct draw_context *draw;
49
50 /* Basic plan is to run these two translate functions before/after
51 * the vertex shader's existing run_linear() routine to simulate
52 * the inclusion of this functionality into the shader...
53 *
54 * Next will look at actually including it.
55 */
56 struct translate *fetch;
57 struct translate *emit;
58
59 unsigned temp_vertex_stride;
60 };
61
62
63
64
65
vsvg_set_buffer(struct draw_vs_variant * variant,unsigned buffer,const void * ptr,unsigned stride,unsigned max_index)66 static void vsvg_set_buffer( struct draw_vs_variant *variant,
67 unsigned buffer,
68 const void *ptr,
69 unsigned stride,
70 unsigned max_index )
71 {
72 struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
73
74 vsvg->fetch->set_buffer(vsvg->fetch,
75 buffer,
76 ptr,
77 stride,
78 max_index );
79 }
80
81 static const struct pipe_viewport_state *
find_viewport(struct draw_context * draw,char * buffer,unsigned vertex_idx,unsigned stride)82 find_viewport(struct draw_context *draw,
83 char *buffer,
84 unsigned vertex_idx,
85 unsigned stride)
86 {
87 int viewport_index_output =
88 draw_current_shader_viewport_index_output(draw);
89 char *ptr = buffer + vertex_idx * stride;
90 unsigned *data = (unsigned *)ptr;
91 int viewport_index =
92 draw_current_shader_uses_viewport_index(draw) ?
93 data[viewport_index_output * 4] : 0;
94
95 viewport_index = draw_clamp_viewport_idx(viewport_index);
96
97 return &draw->viewports[viewport_index];
98 }
99
100
101 /* Mainly for debug at this stage:
102 */
do_rhw_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)103 static void do_rhw_viewport( struct draw_vs_variant_generic *vsvg,
104 unsigned count,
105 void *output_buffer )
106 {
107 char *ptr = (char *)output_buffer;
108 unsigned stride = vsvg->temp_vertex_stride;
109 unsigned j;
110
111 ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
112
113 for (j = 0; j < count; j++, ptr += stride) {
114 const struct pipe_viewport_state *viewport =
115 find_viewport(vsvg->base.vs->draw, (char*)output_buffer,
116 j, stride);
117 const float *scale = viewport->scale;
118 const float *trans = viewport->translate;
119 float *data = (float *)ptr;
120 float w = 1.0f / data[3];
121
122 data[0] = data[0] * w * scale[0] + trans[0];
123 data[1] = data[1] * w * scale[1] + trans[1];
124 data[2] = data[2] * w * scale[2] + trans[2];
125 data[3] = w;
126 }
127 }
128
do_viewport(struct draw_vs_variant_generic * vsvg,unsigned count,void * output_buffer)129 static void do_viewport( struct draw_vs_variant_generic *vsvg,
130 unsigned count,
131 void *output_buffer )
132 {
133 char *ptr = (char *)output_buffer;
134 unsigned stride = vsvg->temp_vertex_stride;
135 unsigned j;
136
137 ptr += vsvg->base.vs->position_output * 4 * sizeof(float);
138
139 for (j = 0; j < count; j++, ptr += stride) {
140 const struct pipe_viewport_state *viewport =
141 find_viewport(vsvg->base.vs->draw, (char*)output_buffer,
142 j, stride);
143 const float *scale = viewport->scale;
144 const float *trans = viewport->translate;
145 float *data = (float *)ptr;
146
147 data[0] = data[0] * scale[0] + trans[0];
148 data[1] = data[1] * scale[1] + trans[1];
149 data[2] = data[2] * scale[2] + trans[2];
150 }
151 }
152
153
vsvg_run_elts(struct draw_vs_variant * variant,const unsigned * elts,unsigned count,void * output_buffer)154 static void PIPE_CDECL vsvg_run_elts( struct draw_vs_variant *variant,
155 const unsigned *elts,
156 unsigned count,
157 void *output_buffer)
158 {
159 struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
160 unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
161 void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride +
162 DRAW_EXTRA_VERTICES_PADDING );
163
164 if (0) debug_printf("%s %d \n", __FUNCTION__, count);
165
166 /* Want to do this in small batches for cache locality?
167 */
168
169 vsvg->fetch->run_elts( vsvg->fetch,
170 elts,
171 count,
172 vsvg->draw->start_instance,
173 vsvg->draw->instance_id,
174 temp_buffer );
175
176 vsvg->base.vs->run_linear( vsvg->base.vs,
177 temp_buffer,
178 temp_buffer,
179 vsvg->base.vs->draw->pt.user.vs_constants,
180 vsvg->base.vs->draw->pt.user.vs_constants_size,
181 count,
182 temp_vertex_stride,
183 temp_vertex_stride, NULL);
184
185 /* FIXME: geometry shading? */
186
187 if (vsvg->base.key.clip) {
188 /* not really handling clipping, just do the rhw so we can
189 * see the results...
190 */
191 do_rhw_viewport( vsvg,
192 count,
193 temp_buffer );
194 }
195 else if (vsvg->base.key.viewport) {
196 do_viewport( vsvg,
197 count,
198 temp_buffer );
199 }
200
201
202 vsvg->emit->set_buffer( vsvg->emit,
203 0,
204 temp_buffer,
205 temp_vertex_stride,
206 ~0 );
207
208 vsvg->emit->set_buffer( vsvg->emit,
209 1,
210 &vsvg->draw->rasterizer->point_size,
211 0,
212 ~0 );
213
214 vsvg->emit->run( vsvg->emit,
215 0, count,
216 vsvg->draw->start_instance,
217 vsvg->draw->instance_id,
218 output_buffer );
219
220 FREE(temp_buffer);
221 }
222
223
vsvg_run_linear(struct draw_vs_variant * variant,unsigned start,unsigned count,void * output_buffer)224 static void PIPE_CDECL vsvg_run_linear( struct draw_vs_variant *variant,
225 unsigned start,
226 unsigned count,
227 void *output_buffer )
228 {
229 struct draw_vs_variant_generic *vsvg = (struct draw_vs_variant_generic *)variant;
230 unsigned temp_vertex_stride = vsvg->temp_vertex_stride;
231 void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride +
232 DRAW_EXTRA_VERTICES_PADDING );
233
234 if (0) debug_printf("%s %d %d (sz %d, %d)\n", __FUNCTION__, start, count,
235 vsvg->base.key.output_stride,
236 temp_vertex_stride);
237
238 vsvg->fetch->run( vsvg->fetch,
239 start,
240 count,
241 vsvg->draw->start_instance,
242 vsvg->draw->instance_id,
243 temp_buffer );
244
245 vsvg->base.vs->run_linear( vsvg->base.vs,
246 temp_buffer,
247 temp_buffer,
248 vsvg->base.vs->draw->pt.user.vs_constants,
249 vsvg->base.vs->draw->pt.user.vs_constants_size,
250 count,
251 temp_vertex_stride,
252 temp_vertex_stride, NULL);
253
254 if (vsvg->base.key.clip) {
255 /* not really handling clipping, just do the rhw so we can
256 * see the results...
257 */
258 do_rhw_viewport( vsvg,
259 count,
260 temp_buffer );
261 }
262 else if (vsvg->base.key.viewport) {
263 do_viewport( vsvg,
264 count,
265 temp_buffer );
266 }
267
268 vsvg->emit->set_buffer( vsvg->emit,
269 0,
270 temp_buffer,
271 temp_vertex_stride,
272 ~0 );
273
274 vsvg->emit->set_buffer( vsvg->emit,
275 1,
276 &vsvg->draw->rasterizer->point_size,
277 0,
278 ~0 );
279
280 vsvg->emit->run( vsvg->emit,
281 0, count,
282 vsvg->draw->start_instance,
283 vsvg->draw->instance_id,
284 output_buffer );
285
286 FREE(temp_buffer);
287 }
288
289
290
291
292
vsvg_destroy(struct draw_vs_variant * variant)293 static void vsvg_destroy( struct draw_vs_variant *variant )
294 {
295 FREE(variant);
296 }
297
298
299 struct draw_vs_variant *
draw_vs_create_variant_generic(struct draw_vertex_shader * vs,const struct draw_vs_variant_key * key)300 draw_vs_create_variant_generic( struct draw_vertex_shader *vs,
301 const struct draw_vs_variant_key *key )
302 {
303 unsigned i;
304 struct translate_key fetch, emit;
305
306 struct draw_vs_variant_generic *vsvg = CALLOC_STRUCT( draw_vs_variant_generic );
307 if (!vsvg)
308 return NULL;
309
310 vsvg->base.key = *key;
311 vsvg->base.vs = vs;
312 vsvg->base.set_buffer = vsvg_set_buffer;
313 vsvg->base.run_elts = vsvg_run_elts;
314 vsvg->base.run_linear = vsvg_run_linear;
315 vsvg->base.destroy = vsvg_destroy;
316
317 vsvg->draw = vs->draw;
318
319 vsvg->temp_vertex_stride = MAX2(key->nr_inputs,
320 draw_total_vs_outputs(vs->draw)) * 4 * sizeof(float);
321
322 /* Build free-standing fetch and emit functions:
323 */
324 fetch.nr_elements = key->nr_inputs;
325 fetch.output_stride = vsvg->temp_vertex_stride;
326 for (i = 0; i < key->nr_inputs; i++) {
327 fetch.element[i].type = TRANSLATE_ELEMENT_NORMAL;
328 fetch.element[i].input_format = key->element[i].in.format;
329 fetch.element[i].input_buffer = key->element[i].in.buffer;
330 fetch.element[i].input_offset = key->element[i].in.offset;
331 fetch.element[i].instance_divisor = 0;
332 fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
333 fetch.element[i].output_offset = i * 4 * sizeof(float);
334 assert(fetch.element[i].output_offset < fetch.output_stride);
335 }
336
337
338 emit.nr_elements = key->nr_outputs;
339 emit.output_stride = key->output_stride;
340 for (i = 0; i < key->nr_outputs; i++) {
341 if (key->element[i].out.format != EMIT_1F_PSIZE)
342 {
343 emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
344 emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
345 emit.element[i].input_buffer = 0;
346 emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float);
347 emit.element[i].instance_divisor = 0;
348 emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format);
349 emit.element[i].output_offset = key->element[i].out.offset;
350 assert(emit.element[i].input_offset <= fetch.output_stride);
351 }
352 else {
353 emit.element[i].type = TRANSLATE_ELEMENT_NORMAL;
354 emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT;
355 emit.element[i].input_buffer = 1;
356 emit.element[i].input_offset = 0;
357 emit.element[i].instance_divisor = 0;
358 emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT;
359 emit.element[i].output_offset = key->element[i].out.offset;
360 }
361 }
362
363 vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch );
364 vsvg->emit = draw_vs_get_emit( vs->draw, &emit );
365
366 return &vsvg->base;
367 }
368
369
370
371
372
373