1 /**************************************************************************
2 *
3 * Copyright 2009-2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_screen.h"
30 #include "pipe/p_context.h"
31 #include "pipe/p_state.h"
32 #include "tgsi/tgsi_ureg.h"
33 #include "tgsi/tgsi_build.h"
34 #include "tgsi/tgsi_from_mesa.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "tgsi/tgsi_sanity.h"
38 #include "util/u_debug.h"
39 #include "util/u_inlines.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_bitmask.h"
43 #include "GL/gl.h"
44 #include "compiler/shader_info.h"
45
46 union tgsi_any_token {
47 struct tgsi_header header;
48 struct tgsi_processor processor;
49 struct tgsi_token token;
50 struct tgsi_property prop;
51 struct tgsi_property_data prop_data;
52 struct tgsi_declaration decl;
53 struct tgsi_declaration_range decl_range;
54 struct tgsi_declaration_dimension decl_dim;
55 struct tgsi_declaration_interp decl_interp;
56 struct tgsi_declaration_image decl_image;
57 struct tgsi_declaration_semantic decl_semantic;
58 struct tgsi_declaration_sampler_view decl_sampler_view;
59 struct tgsi_declaration_array array;
60 struct tgsi_immediate imm;
61 union tgsi_immediate_data imm_data;
62 struct tgsi_instruction insn;
63 struct tgsi_instruction_label insn_label;
64 struct tgsi_instruction_texture insn_texture;
65 struct tgsi_instruction_memory insn_memory;
66 struct tgsi_texture_offset insn_texture_offset;
67 struct tgsi_src_register src;
68 struct tgsi_ind_register ind;
69 struct tgsi_dimension dim;
70 struct tgsi_dst_register dst;
71 unsigned value;
72 };
73
74
75 struct ureg_tokens {
76 union tgsi_any_token *tokens;
77 unsigned size;
78 unsigned order;
79 unsigned count;
80 };
81
82 #define UREG_MAX_INPUT (4 * PIPE_MAX_SHADER_INPUTS)
83 #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
84 #define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS)
85 #define UREG_MAX_CONSTANT_RANGE 32
86 #define UREG_MAX_HW_ATOMIC_RANGE 32
87 #define UREG_MAX_IMMEDIATE 4096
88 #define UREG_MAX_ADDR 3
89 #define UREG_MAX_ARRAY_TEMPS 256
90
91 struct const_decl {
92 struct {
93 unsigned first;
94 unsigned last;
95 } constant_range[UREG_MAX_CONSTANT_RANGE];
96 unsigned nr_constant_ranges;
97 };
98
99 struct hw_atomic_decl {
100 struct {
101 unsigned first;
102 unsigned last;
103 unsigned array_id;
104 } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE];
105 unsigned nr_hw_atomic_ranges;
106 };
107
108 #define DOMAIN_DECL 0
109 #define DOMAIN_INSN 1
110
111 struct ureg_program
112 {
113 enum pipe_shader_type processor;
114 bool supports_any_inout_decl_range;
115 int next_shader_processor;
116
117 struct ureg_input_decl {
118 enum tgsi_semantic semantic_name;
119 unsigned semantic_index;
120 enum tgsi_interpolate_mode interp;
121 unsigned char usage_mask;
122 enum tgsi_interpolate_loc interp_location;
123 unsigned first;
124 unsigned last;
125 unsigned array_id;
126 } input[UREG_MAX_INPUT];
127 unsigned nr_inputs, nr_input_regs;
128
129 unsigned vs_inputs[PIPE_MAX_ATTRIBS/32];
130
131 struct {
132 enum tgsi_semantic semantic_name;
133 unsigned semantic_index;
134 } system_value[UREG_MAX_SYSTEM_VALUE];
135 unsigned nr_system_values;
136
137 struct ureg_output_decl {
138 enum tgsi_semantic semantic_name;
139 unsigned semantic_index;
140 unsigned streams;
141 unsigned usage_mask; /* = TGSI_WRITEMASK_* */
142 unsigned first;
143 unsigned last;
144 unsigned array_id;
145 boolean invariant;
146 } output[UREG_MAX_OUTPUT];
147 unsigned nr_outputs, nr_output_regs;
148
149 struct {
150 union {
151 float f[4];
152 unsigned u[4];
153 int i[4];
154 } value;
155 unsigned nr;
156 unsigned type;
157 } immediate[UREG_MAX_IMMEDIATE];
158 unsigned nr_immediates;
159
160 struct ureg_src sampler[PIPE_MAX_SAMPLERS];
161 unsigned nr_samplers;
162
163 struct {
164 unsigned index;
165 enum tgsi_texture_type target;
166 enum tgsi_return_type return_type_x;
167 enum tgsi_return_type return_type_y;
168 enum tgsi_return_type return_type_z;
169 enum tgsi_return_type return_type_w;
170 } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
171 unsigned nr_sampler_views;
172
173 struct {
174 unsigned index;
175 enum tgsi_texture_type target;
176 enum pipe_format format;
177 boolean wr;
178 boolean raw;
179 } image[PIPE_MAX_SHADER_IMAGES];
180 unsigned nr_images;
181
182 struct {
183 unsigned index;
184 bool atomic;
185 } buffer[PIPE_MAX_SHADER_BUFFERS];
186 unsigned nr_buffers;
187
188 struct util_bitmask *free_temps;
189 struct util_bitmask *local_temps;
190 struct util_bitmask *decl_temps;
191 unsigned nr_temps;
192
193 unsigned array_temps[UREG_MAX_ARRAY_TEMPS];
194 unsigned nr_array_temps;
195
196 struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS];
197
198 struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS];
199
200 unsigned properties[TGSI_PROPERTY_COUNT];
201
202 unsigned nr_addrs;
203 unsigned nr_instructions;
204
205 struct ureg_tokens domain[2];
206
207 bool use_memory[TGSI_MEMORY_TYPE_COUNT];
208 };
209
210 static union tgsi_any_token error_tokens[32];
211
tokens_error(struct ureg_tokens * tokens)212 static void tokens_error( struct ureg_tokens *tokens )
213 {
214 if (tokens->tokens && tokens->tokens != error_tokens)
215 FREE(tokens->tokens);
216
217 tokens->tokens = error_tokens;
218 tokens->size = ARRAY_SIZE(error_tokens);
219 tokens->count = 0;
220 }
221
222
tokens_expand(struct ureg_tokens * tokens,unsigned count)223 static void tokens_expand( struct ureg_tokens *tokens,
224 unsigned count )
225 {
226 unsigned old_size = tokens->size * sizeof(unsigned);
227
228 if (tokens->tokens == error_tokens) {
229 return;
230 }
231
232 while (tokens->count + count > tokens->size) {
233 tokens->size = (1 << ++tokens->order);
234 }
235
236 tokens->tokens = REALLOC(tokens->tokens,
237 old_size,
238 tokens->size * sizeof(unsigned));
239 if (tokens->tokens == NULL) {
240 tokens_error(tokens);
241 }
242 }
243
set_bad(struct ureg_program * ureg)244 static void set_bad( struct ureg_program *ureg )
245 {
246 tokens_error(&ureg->domain[0]);
247 }
248
249
250
get_tokens(struct ureg_program * ureg,unsigned domain,unsigned count)251 static union tgsi_any_token *get_tokens( struct ureg_program *ureg,
252 unsigned domain,
253 unsigned count )
254 {
255 struct ureg_tokens *tokens = &ureg->domain[domain];
256 union tgsi_any_token *result;
257
258 if (tokens->count + count > tokens->size)
259 tokens_expand(tokens, count);
260
261 result = &tokens->tokens[tokens->count];
262 tokens->count += count;
263 return result;
264 }
265
266
retrieve_token(struct ureg_program * ureg,unsigned domain,unsigned nr)267 static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
268 unsigned domain,
269 unsigned nr )
270 {
271 if (ureg->domain[domain].tokens == error_tokens)
272 return &error_tokens[0];
273
274 return &ureg->domain[domain].tokens[nr];
275 }
276
277
278 void
ureg_property(struct ureg_program * ureg,unsigned name,unsigned value)279 ureg_property(struct ureg_program *ureg, unsigned name, unsigned value)
280 {
281 assert(name < ARRAY_SIZE(ureg->properties));
282 ureg->properties[name] = value;
283 }
284
285 struct ureg_src
ureg_DECL_fs_input_centroid_layout(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,enum tgsi_interpolate_mode interp_mode,enum tgsi_interpolate_loc interp_location,unsigned index,unsigned usage_mask,unsigned array_id,unsigned array_size)286 ureg_DECL_fs_input_centroid_layout(struct ureg_program *ureg,
287 enum tgsi_semantic semantic_name,
288 unsigned semantic_index,
289 enum tgsi_interpolate_mode interp_mode,
290 enum tgsi_interpolate_loc interp_location,
291 unsigned index,
292 unsigned usage_mask,
293 unsigned array_id,
294 unsigned array_size)
295 {
296 unsigned i;
297
298 assert(usage_mask != 0);
299 assert(usage_mask <= TGSI_WRITEMASK_XYZW);
300
301 for (i = 0; i < ureg->nr_inputs; i++) {
302 if (ureg->input[i].semantic_name == semantic_name &&
303 ureg->input[i].semantic_index == semantic_index) {
304 assert(ureg->input[i].interp == interp_mode);
305 assert(ureg->input[i].interp_location == interp_location);
306 if (ureg->input[i].array_id == array_id) {
307 ureg->input[i].usage_mask |= usage_mask;
308 goto out;
309 }
310 assert((ureg->input[i].usage_mask & usage_mask) == 0);
311 }
312 }
313
314 if (ureg->nr_inputs < UREG_MAX_INPUT) {
315 assert(array_size >= 1);
316 ureg->input[i].semantic_name = semantic_name;
317 ureg->input[i].semantic_index = semantic_index;
318 ureg->input[i].interp = interp_mode;
319 ureg->input[i].interp_location = interp_location;
320 ureg->input[i].first = index;
321 ureg->input[i].last = index + array_size - 1;
322 ureg->input[i].array_id = array_id;
323 ureg->input[i].usage_mask = usage_mask;
324 ureg->nr_input_regs = MAX2(ureg->nr_input_regs, index + array_size);
325 ureg->nr_inputs++;
326 } else {
327 set_bad(ureg);
328 }
329
330 out:
331 return ureg_src_array_register(TGSI_FILE_INPUT, ureg->input[i].first,
332 array_id);
333 }
334
335 struct ureg_src
ureg_DECL_fs_input_centroid(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,enum tgsi_interpolate_mode interp_mode,enum tgsi_interpolate_loc interp_location,unsigned array_id,unsigned array_size)336 ureg_DECL_fs_input_centroid(struct ureg_program *ureg,
337 enum tgsi_semantic semantic_name,
338 unsigned semantic_index,
339 enum tgsi_interpolate_mode interp_mode,
340 enum tgsi_interpolate_loc interp_location,
341 unsigned array_id,
342 unsigned array_size)
343 {
344 return ureg_DECL_fs_input_centroid_layout(ureg,
345 semantic_name, semantic_index, interp_mode,
346 interp_location,
347 ureg->nr_input_regs, TGSI_WRITEMASK_XYZW, array_id, array_size);
348 }
349
350
351 struct ureg_src
ureg_DECL_vs_input(struct ureg_program * ureg,unsigned index)352 ureg_DECL_vs_input( struct ureg_program *ureg,
353 unsigned index )
354 {
355 assert(ureg->processor == PIPE_SHADER_VERTEX);
356 assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs));
357
358 ureg->vs_inputs[index/32] |= 1 << (index % 32);
359 return ureg_src_register( TGSI_FILE_INPUT, index );
360 }
361
362
363 struct ureg_src
ureg_DECL_input_layout(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned index,unsigned usage_mask,unsigned array_id,unsigned array_size)364 ureg_DECL_input_layout(struct ureg_program *ureg,
365 enum tgsi_semantic semantic_name,
366 unsigned semantic_index,
367 unsigned index,
368 unsigned usage_mask,
369 unsigned array_id,
370 unsigned array_size)
371 {
372 return ureg_DECL_fs_input_centroid_layout(ureg,
373 semantic_name, semantic_index,
374 TGSI_INTERPOLATE_CONSTANT, TGSI_INTERPOLATE_LOC_CENTER,
375 index, usage_mask, array_id, array_size);
376 }
377
378
379 struct ureg_src
ureg_DECL_input(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned array_id,unsigned array_size)380 ureg_DECL_input(struct ureg_program *ureg,
381 enum tgsi_semantic semantic_name,
382 unsigned semantic_index,
383 unsigned array_id,
384 unsigned array_size)
385 {
386 return ureg_DECL_fs_input_centroid(ureg, semantic_name, semantic_index,
387 TGSI_INTERPOLATE_CONSTANT,
388 TGSI_INTERPOLATE_LOC_CENTER,
389 array_id, array_size);
390 }
391
392
393 struct ureg_src
ureg_DECL_system_value(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index)394 ureg_DECL_system_value(struct ureg_program *ureg,
395 enum tgsi_semantic semantic_name,
396 unsigned semantic_index)
397 {
398 unsigned i;
399
400 for (i = 0; i < ureg->nr_system_values; i++) {
401 if (ureg->system_value[i].semantic_name == semantic_name &&
402 ureg->system_value[i].semantic_index == semantic_index) {
403 goto out;
404 }
405 }
406
407 if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
408 ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
409 ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
410 i = ureg->nr_system_values;
411 ureg->nr_system_values++;
412 } else {
413 set_bad(ureg);
414 }
415
416 out:
417 return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i);
418 }
419
420
421 struct ureg_dst
ureg_DECL_output_layout(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned streams,unsigned index,unsigned usage_mask,unsigned array_id,unsigned array_size,boolean invariant)422 ureg_DECL_output_layout(struct ureg_program *ureg,
423 enum tgsi_semantic semantic_name,
424 unsigned semantic_index,
425 unsigned streams,
426 unsigned index,
427 unsigned usage_mask,
428 unsigned array_id,
429 unsigned array_size,
430 boolean invariant)
431 {
432 unsigned i;
433
434 assert(usage_mask != 0);
435 assert(!(streams & 0x03) || (usage_mask & 1));
436 assert(!(streams & 0x0c) || (usage_mask & 2));
437 assert(!(streams & 0x30) || (usage_mask & 4));
438 assert(!(streams & 0xc0) || (usage_mask & 8));
439
440 for (i = 0; i < ureg->nr_outputs; i++) {
441 if (ureg->output[i].semantic_name == semantic_name &&
442 ureg->output[i].semantic_index == semantic_index) {
443 if (ureg->output[i].array_id == array_id) {
444 ureg->output[i].usage_mask |= usage_mask;
445 goto out;
446 }
447 assert((ureg->output[i].usage_mask & usage_mask) == 0);
448 }
449 }
450
451 if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
452 ureg->output[i].semantic_name = semantic_name;
453 ureg->output[i].semantic_index = semantic_index;
454 ureg->output[i].usage_mask = usage_mask;
455 ureg->output[i].first = index;
456 ureg->output[i].last = index + array_size - 1;
457 ureg->output[i].array_id = array_id;
458 ureg->output[i].invariant = invariant;
459 ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size);
460 ureg->nr_outputs++;
461 }
462 else {
463 set_bad( ureg );
464 i = 0;
465 }
466
467 out:
468 ureg->output[i].streams |= streams;
469
470 return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first,
471 array_id);
472 }
473
474
475 struct ureg_dst
ureg_DECL_output_masked(struct ureg_program * ureg,unsigned name,unsigned index,unsigned usage_mask,unsigned array_id,unsigned array_size)476 ureg_DECL_output_masked(struct ureg_program *ureg,
477 unsigned name,
478 unsigned index,
479 unsigned usage_mask,
480 unsigned array_id,
481 unsigned array_size)
482 {
483 return ureg_DECL_output_layout(ureg, name, index, 0,
484 ureg->nr_output_regs, usage_mask, array_id,
485 array_size, FALSE);
486 }
487
488
489 struct ureg_dst
ureg_DECL_output(struct ureg_program * ureg,enum tgsi_semantic name,unsigned index)490 ureg_DECL_output(struct ureg_program *ureg,
491 enum tgsi_semantic name,
492 unsigned index)
493 {
494 return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW,
495 0, 1);
496 }
497
498 struct ureg_dst
ureg_DECL_output_array(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned array_id,unsigned array_size)499 ureg_DECL_output_array(struct ureg_program *ureg,
500 enum tgsi_semantic semantic_name,
501 unsigned semantic_index,
502 unsigned array_id,
503 unsigned array_size)
504 {
505 return ureg_DECL_output_masked(ureg, semantic_name, semantic_index,
506 TGSI_WRITEMASK_XYZW,
507 array_id, array_size);
508 }
509
510
511 /* Returns a new constant register. Keep track of which have been
512 * referred to so that we can emit decls later.
513 *
514 * Constant operands declared with this function must be addressed
515 * with a two-dimensional index.
516 *
517 * There is nothing in this code to bind this constant to any tracked
518 * value or manage any constant_buffer contents -- that's the
519 * resposibility of the calling code.
520 */
521 void
ureg_DECL_constant2D(struct ureg_program * ureg,unsigned first,unsigned last,unsigned index2D)522 ureg_DECL_constant2D(struct ureg_program *ureg,
523 unsigned first,
524 unsigned last,
525 unsigned index2D)
526 {
527 struct const_decl *decl = &ureg->const_decls[index2D];
528
529 assert(index2D < PIPE_MAX_CONSTANT_BUFFERS);
530
531 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
532 uint i = decl->nr_constant_ranges++;
533
534 decl->constant_range[i].first = first;
535 decl->constant_range[i].last = last;
536 }
537 }
538
539
540 /* A one-dimensional, deprecated version of ureg_DECL_constant2D().
541 *
542 * Constant operands declared with this function must be addressed
543 * with a one-dimensional index.
544 */
545 struct ureg_src
ureg_DECL_constant(struct ureg_program * ureg,unsigned index)546 ureg_DECL_constant(struct ureg_program *ureg,
547 unsigned index)
548 {
549 struct const_decl *decl = &ureg->const_decls[0];
550 unsigned minconst = index, maxconst = index;
551 unsigned i;
552
553 /* Inside existing range?
554 */
555 for (i = 0; i < decl->nr_constant_ranges; i++) {
556 if (decl->constant_range[i].first <= index &&
557 decl->constant_range[i].last >= index) {
558 goto out;
559 }
560 }
561
562 /* Extend existing range?
563 */
564 for (i = 0; i < decl->nr_constant_ranges; i++) {
565 if (decl->constant_range[i].last == index - 1) {
566 decl->constant_range[i].last = index;
567 goto out;
568 }
569
570 if (decl->constant_range[i].first == index + 1) {
571 decl->constant_range[i].first = index;
572 goto out;
573 }
574
575 minconst = MIN2(minconst, decl->constant_range[i].first);
576 maxconst = MAX2(maxconst, decl->constant_range[i].last);
577 }
578
579 /* Create new range?
580 */
581 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
582 i = decl->nr_constant_ranges++;
583 decl->constant_range[i].first = index;
584 decl->constant_range[i].last = index;
585 goto out;
586 }
587
588 /* Collapse all ranges down to one:
589 */
590 i = 0;
591 decl->constant_range[0].first = minconst;
592 decl->constant_range[0].last = maxconst;
593 decl->nr_constant_ranges = 1;
594
595 out:
596 assert(i < decl->nr_constant_ranges);
597 assert(decl->constant_range[i].first <= index);
598 assert(decl->constant_range[i].last >= index);
599
600 struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, index);
601 return ureg_src_dimension(src, 0);
602 }
603
604
605 /* Returns a new hw atomic register. Keep track of which have been
606 * referred to so that we can emit decls later.
607 */
608 void
ureg_DECL_hw_atomic(struct ureg_program * ureg,unsigned first,unsigned last,unsigned buffer_id,unsigned array_id)609 ureg_DECL_hw_atomic(struct ureg_program *ureg,
610 unsigned first,
611 unsigned last,
612 unsigned buffer_id,
613 unsigned array_id)
614 {
615 struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[buffer_id];
616
617 if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) {
618 uint i = decl->nr_hw_atomic_ranges++;
619
620 decl->hw_atomic_range[i].first = first;
621 decl->hw_atomic_range[i].last = last;
622 decl->hw_atomic_range[i].array_id = array_id;
623 } else {
624 set_bad(ureg);
625 }
626 }
627
alloc_temporary(struct ureg_program * ureg,boolean local)628 static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
629 boolean local )
630 {
631 unsigned i;
632
633 /* Look for a released temporary.
634 */
635 for (i = util_bitmask_get_first_index(ureg->free_temps);
636 i != UTIL_BITMASK_INVALID_INDEX;
637 i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) {
638 if (util_bitmask_get(ureg->local_temps, i) == local)
639 break;
640 }
641
642 /* Or allocate a new one.
643 */
644 if (i == UTIL_BITMASK_INVALID_INDEX) {
645 i = ureg->nr_temps++;
646
647 if (local)
648 util_bitmask_set(ureg->local_temps, i);
649
650 /* Start a new declaration when the local flag changes */
651 if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local)
652 util_bitmask_set(ureg->decl_temps, i);
653 }
654
655 util_bitmask_clear(ureg->free_temps, i);
656
657 return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
658 }
659
ureg_DECL_temporary(struct ureg_program * ureg)660 struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
661 {
662 return alloc_temporary(ureg, FALSE);
663 }
664
ureg_DECL_local_temporary(struct ureg_program * ureg)665 struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg )
666 {
667 return alloc_temporary(ureg, TRUE);
668 }
669
ureg_DECL_array_temporary(struct ureg_program * ureg,unsigned size,boolean local)670 struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg,
671 unsigned size,
672 boolean local )
673 {
674 unsigned i = ureg->nr_temps;
675 struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i );
676
677 if (local)
678 util_bitmask_set(ureg->local_temps, i);
679
680 /* Always start a new declaration at the start */
681 util_bitmask_set(ureg->decl_temps, i);
682
683 ureg->nr_temps += size;
684
685 /* and also at the end of the array */
686 util_bitmask_set(ureg->decl_temps, ureg->nr_temps);
687
688 if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) {
689 ureg->array_temps[ureg->nr_array_temps++] = i;
690 dst.ArrayID = ureg->nr_array_temps;
691 }
692
693 return dst;
694 }
695
ureg_release_temporary(struct ureg_program * ureg,struct ureg_dst tmp)696 void ureg_release_temporary( struct ureg_program *ureg,
697 struct ureg_dst tmp )
698 {
699 if(tmp.File == TGSI_FILE_TEMPORARY)
700 util_bitmask_set(ureg->free_temps, tmp.Index);
701 }
702
703
704 /* Allocate a new address register.
705 */
ureg_DECL_address(struct ureg_program * ureg)706 struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
707 {
708 if (ureg->nr_addrs < UREG_MAX_ADDR)
709 return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ );
710
711 assert( 0 );
712 return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
713 }
714
715 /* Allocate a new sampler.
716 */
ureg_DECL_sampler(struct ureg_program * ureg,unsigned nr)717 struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
718 unsigned nr )
719 {
720 unsigned i;
721
722 for (i = 0; i < ureg->nr_samplers; i++)
723 if (ureg->sampler[i].Index == (int)nr)
724 return ureg->sampler[i];
725
726 if (i < PIPE_MAX_SAMPLERS) {
727 ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr );
728 ureg->nr_samplers++;
729 return ureg->sampler[i];
730 }
731
732 assert( 0 );
733 return ureg->sampler[0];
734 }
735
736 /*
737 * Allocate a new shader sampler view.
738 */
739 struct ureg_src
ureg_DECL_sampler_view(struct ureg_program * ureg,unsigned index,enum tgsi_texture_type target,enum tgsi_return_type return_type_x,enum tgsi_return_type return_type_y,enum tgsi_return_type return_type_z,enum tgsi_return_type return_type_w)740 ureg_DECL_sampler_view(struct ureg_program *ureg,
741 unsigned index,
742 enum tgsi_texture_type target,
743 enum tgsi_return_type return_type_x,
744 enum tgsi_return_type return_type_y,
745 enum tgsi_return_type return_type_z,
746 enum tgsi_return_type return_type_w)
747 {
748 struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index);
749 uint i;
750
751 for (i = 0; i < ureg->nr_sampler_views; i++) {
752 if (ureg->sampler_view[i].index == index) {
753 return reg;
754 }
755 }
756
757 if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) {
758 ureg->sampler_view[i].index = index;
759 ureg->sampler_view[i].target = target;
760 ureg->sampler_view[i].return_type_x = return_type_x;
761 ureg->sampler_view[i].return_type_y = return_type_y;
762 ureg->sampler_view[i].return_type_z = return_type_z;
763 ureg->sampler_view[i].return_type_w = return_type_w;
764 ureg->nr_sampler_views++;
765 return reg;
766 }
767
768 assert(0);
769 return reg;
770 }
771
772 /* Allocate a new image.
773 */
774 struct ureg_src
ureg_DECL_image(struct ureg_program * ureg,unsigned index,enum tgsi_texture_type target,enum pipe_format format,boolean wr,boolean raw)775 ureg_DECL_image(struct ureg_program *ureg,
776 unsigned index,
777 enum tgsi_texture_type target,
778 enum pipe_format format,
779 boolean wr,
780 boolean raw)
781 {
782 struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index);
783 unsigned i;
784
785 for (i = 0; i < ureg->nr_images; i++)
786 if (ureg->image[i].index == index)
787 return reg;
788
789 if (i < PIPE_MAX_SHADER_IMAGES) {
790 ureg->image[i].index = index;
791 ureg->image[i].target = target;
792 ureg->image[i].wr = wr;
793 ureg->image[i].raw = raw;
794 ureg->image[i].format = format;
795 ureg->nr_images++;
796 return reg;
797 }
798
799 assert(0);
800 return reg;
801 }
802
803 /* Allocate a new buffer.
804 */
ureg_DECL_buffer(struct ureg_program * ureg,unsigned nr,bool atomic)805 struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
806 bool atomic)
807 {
808 struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr);
809 unsigned i;
810
811 for (i = 0; i < ureg->nr_buffers; i++)
812 if (ureg->buffer[i].index == nr)
813 return reg;
814
815 if (i < PIPE_MAX_SHADER_BUFFERS) {
816 ureg->buffer[i].index = nr;
817 ureg->buffer[i].atomic = atomic;
818 ureg->nr_buffers++;
819 return reg;
820 }
821
822 assert(0);
823 return reg;
824 }
825
826 /* Allocate a memory area.
827 */
ureg_DECL_memory(struct ureg_program * ureg,unsigned memory_type)828 struct ureg_src ureg_DECL_memory(struct ureg_program *ureg,
829 unsigned memory_type)
830 {
831 struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type);
832
833 ureg->use_memory[memory_type] = true;
834 return reg;
835 }
836
837 static int
match_or_expand_immediate64(const unsigned * v,unsigned nr,unsigned * v2,unsigned * pnr2,unsigned * swizzle)838 match_or_expand_immediate64( const unsigned *v,
839 unsigned nr,
840 unsigned *v2,
841 unsigned *pnr2,
842 unsigned *swizzle )
843 {
844 unsigned nr2 = *pnr2;
845 unsigned i, j;
846 *swizzle = 0;
847
848 for (i = 0; i < nr; i += 2) {
849 boolean found = FALSE;
850
851 for (j = 0; j < nr2 && !found; j += 2) {
852 if (v[i] == v2[j] && v[i + 1] == v2[j + 1]) {
853 *swizzle |= (j << (i * 2)) | ((j + 1) << ((i + 1) * 2));
854 found = TRUE;
855 }
856 }
857 if (!found) {
858 if ((nr2) >= 4) {
859 return FALSE;
860 }
861
862 v2[nr2] = v[i];
863 v2[nr2 + 1] = v[i + 1];
864
865 *swizzle |= (nr2 << (i * 2)) | ((nr2 + 1) << ((i + 1) * 2));
866 nr2 += 2;
867 }
868 }
869
870 /* Actually expand immediate only when fully succeeded.
871 */
872 *pnr2 = nr2;
873 return TRUE;
874 }
875
876 static int
match_or_expand_immediate(const unsigned * v,int type,unsigned nr,unsigned * v2,unsigned * pnr2,unsigned * swizzle)877 match_or_expand_immediate( const unsigned *v,
878 int type,
879 unsigned nr,
880 unsigned *v2,
881 unsigned *pnr2,
882 unsigned *swizzle )
883 {
884 unsigned nr2 = *pnr2;
885 unsigned i, j;
886
887 if (type == TGSI_IMM_FLOAT64 ||
888 type == TGSI_IMM_UINT64 ||
889 type == TGSI_IMM_INT64)
890 return match_or_expand_immediate64(v, nr, v2, pnr2, swizzle);
891
892 *swizzle = 0;
893
894 for (i = 0; i < nr; i++) {
895 boolean found = FALSE;
896
897 for (j = 0; j < nr2 && !found; j++) {
898 if (v[i] == v2[j]) {
899 *swizzle |= j << (i * 2);
900 found = TRUE;
901 }
902 }
903
904 if (!found) {
905 if (nr2 >= 4) {
906 return FALSE;
907 }
908
909 v2[nr2] = v[i];
910 *swizzle |= nr2 << (i * 2);
911 nr2++;
912 }
913 }
914
915 /* Actually expand immediate only when fully succeeded.
916 */
917 *pnr2 = nr2;
918 return TRUE;
919 }
920
921
922 static struct ureg_src
decl_immediate(struct ureg_program * ureg,const unsigned * v,unsigned nr,unsigned type)923 decl_immediate( struct ureg_program *ureg,
924 const unsigned *v,
925 unsigned nr,
926 unsigned type )
927 {
928 unsigned i, j;
929 unsigned swizzle = 0;
930
931 /* Could do a first pass where we examine all existing immediates
932 * without expanding.
933 */
934
935 for (i = 0; i < ureg->nr_immediates; i++) {
936 if (ureg->immediate[i].type != type) {
937 continue;
938 }
939 if (match_or_expand_immediate(v,
940 type,
941 nr,
942 ureg->immediate[i].value.u,
943 &ureg->immediate[i].nr,
944 &swizzle)) {
945 goto out;
946 }
947 }
948
949 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
950 i = ureg->nr_immediates++;
951 ureg->immediate[i].type = type;
952 if (match_or_expand_immediate(v,
953 type,
954 nr,
955 ureg->immediate[i].value.u,
956 &ureg->immediate[i].nr,
957 &swizzle)) {
958 goto out;
959 }
960 }
961
962 set_bad(ureg);
963
964 out:
965 /* Make sure that all referenced elements are from this immediate.
966 * Has the effect of making size-one immediates into scalars.
967 */
968 if (type == TGSI_IMM_FLOAT64 ||
969 type == TGSI_IMM_UINT64 ||
970 type == TGSI_IMM_INT64) {
971 for (j = nr; j < 4; j+=2) {
972 swizzle |= (swizzle & 0xf) << (j * 2);
973 }
974 } else {
975 for (j = nr; j < 4; j++) {
976 swizzle |= (swizzle & 0x3) << (j * 2);
977 }
978 }
979 return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
980 (swizzle >> 0) & 0x3,
981 (swizzle >> 2) & 0x3,
982 (swizzle >> 4) & 0x3,
983 (swizzle >> 6) & 0x3);
984 }
985
986
987 struct ureg_src
ureg_DECL_immediate(struct ureg_program * ureg,const float * v,unsigned nr)988 ureg_DECL_immediate( struct ureg_program *ureg,
989 const float *v,
990 unsigned nr )
991 {
992 union {
993 float f[4];
994 unsigned u[4];
995 } fu;
996 unsigned int i;
997
998 for (i = 0; i < nr; i++) {
999 fu.f[i] = v[i];
1000 }
1001
1002 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
1003 }
1004
1005 struct ureg_src
ureg_DECL_immediate_f64(struct ureg_program * ureg,const double * v,unsigned nr)1006 ureg_DECL_immediate_f64( struct ureg_program *ureg,
1007 const double *v,
1008 unsigned nr )
1009 {
1010 union {
1011 unsigned u[4];
1012 double d[2];
1013 } fu;
1014 unsigned int i;
1015
1016 assert((nr / 2) < 3);
1017 for (i = 0; i < nr / 2; i++) {
1018 fu.d[i] = v[i];
1019 }
1020
1021 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT64);
1022 }
1023
1024 struct ureg_src
ureg_DECL_immediate_uint(struct ureg_program * ureg,const unsigned * v,unsigned nr)1025 ureg_DECL_immediate_uint( struct ureg_program *ureg,
1026 const unsigned *v,
1027 unsigned nr )
1028 {
1029 return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32);
1030 }
1031
1032
1033 struct ureg_src
ureg_DECL_immediate_block_uint(struct ureg_program * ureg,const unsigned * v,unsigned nr)1034 ureg_DECL_immediate_block_uint( struct ureg_program *ureg,
1035 const unsigned *v,
1036 unsigned nr )
1037 {
1038 uint index;
1039 uint i;
1040
1041 if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) {
1042 set_bad(ureg);
1043 return ureg_src_register(TGSI_FILE_IMMEDIATE, 0);
1044 }
1045
1046 index = ureg->nr_immediates;
1047 ureg->nr_immediates += (nr + 3) / 4;
1048
1049 for (i = index; i < ureg->nr_immediates; i++) {
1050 ureg->immediate[i].type = TGSI_IMM_UINT32;
1051 ureg->immediate[i].nr = nr > 4 ? 4 : nr;
1052 memcpy(ureg->immediate[i].value.u,
1053 &v[(i - index) * 4],
1054 ureg->immediate[i].nr * sizeof(uint));
1055 nr -= 4;
1056 }
1057
1058 return ureg_src_register(TGSI_FILE_IMMEDIATE, index);
1059 }
1060
1061
1062 struct ureg_src
ureg_DECL_immediate_int(struct ureg_program * ureg,const int * v,unsigned nr)1063 ureg_DECL_immediate_int( struct ureg_program *ureg,
1064 const int *v,
1065 unsigned nr )
1066 {
1067 return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
1068 }
1069
1070 struct ureg_src
ureg_DECL_immediate_uint64(struct ureg_program * ureg,const uint64_t * v,unsigned nr)1071 ureg_DECL_immediate_uint64( struct ureg_program *ureg,
1072 const uint64_t *v,
1073 unsigned nr )
1074 {
1075 union {
1076 unsigned u[4];
1077 uint64_t u64[2];
1078 } fu;
1079 unsigned int i;
1080
1081 assert((nr / 2) < 3);
1082 for (i = 0; i < nr / 2; i++) {
1083 fu.u64[i] = v[i];
1084 }
1085
1086 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64);
1087 }
1088
1089 struct ureg_src
ureg_DECL_immediate_int64(struct ureg_program * ureg,const int64_t * v,unsigned nr)1090 ureg_DECL_immediate_int64( struct ureg_program *ureg,
1091 const int64_t *v,
1092 unsigned nr )
1093 {
1094 union {
1095 unsigned u[4];
1096 int64_t i64[2];
1097 } fu;
1098 unsigned int i;
1099
1100 assert((nr / 2) < 3);
1101 for (i = 0; i < nr / 2; i++) {
1102 fu.i64[i] = v[i];
1103 }
1104
1105 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64);
1106 }
1107
1108 void
ureg_emit_src(struct ureg_program * ureg,struct ureg_src src)1109 ureg_emit_src( struct ureg_program *ureg,
1110 struct ureg_src src )
1111 {
1112 unsigned size = 1 + (src.Indirect ? 1 : 0) +
1113 (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0);
1114
1115 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
1116 unsigned n = 0;
1117
1118 assert(src.File != TGSI_FILE_NULL);
1119 assert(src.File < TGSI_FILE_COUNT);
1120
1121 out[n].value = 0;
1122 out[n].src.File = src.File;
1123 out[n].src.SwizzleX = src.SwizzleX;
1124 out[n].src.SwizzleY = src.SwizzleY;
1125 out[n].src.SwizzleZ = src.SwizzleZ;
1126 out[n].src.SwizzleW = src.SwizzleW;
1127 out[n].src.Index = src.Index;
1128 out[n].src.Negate = src.Negate;
1129 out[0].src.Absolute = src.Absolute;
1130 n++;
1131
1132 if (src.Indirect) {
1133 out[0].src.Indirect = 1;
1134 out[n].value = 0;
1135 out[n].ind.File = src.IndirectFile;
1136 out[n].ind.Swizzle = src.IndirectSwizzle;
1137 out[n].ind.Index = src.IndirectIndex;
1138 if (!ureg->supports_any_inout_decl_range &&
1139 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
1140 out[n].ind.ArrayID = 0;
1141 else
1142 out[n].ind.ArrayID = src.ArrayID;
1143 n++;
1144 }
1145
1146 if (src.Dimension) {
1147 out[0].src.Dimension = 1;
1148 out[n].dim.Dimension = 0;
1149 out[n].dim.Padding = 0;
1150 if (src.DimIndirect) {
1151 out[n].dim.Indirect = 1;
1152 out[n].dim.Index = src.DimensionIndex;
1153 n++;
1154 out[n].value = 0;
1155 out[n].ind.File = src.DimIndFile;
1156 out[n].ind.Swizzle = src.DimIndSwizzle;
1157 out[n].ind.Index = src.DimIndIndex;
1158 if (!ureg->supports_any_inout_decl_range &&
1159 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
1160 out[n].ind.ArrayID = 0;
1161 else
1162 out[n].ind.ArrayID = src.ArrayID;
1163 } else {
1164 out[n].dim.Indirect = 0;
1165 out[n].dim.Index = src.DimensionIndex;
1166 }
1167 n++;
1168 }
1169
1170 assert(n == size);
1171 }
1172
1173
1174 void
ureg_emit_dst(struct ureg_program * ureg,struct ureg_dst dst)1175 ureg_emit_dst( struct ureg_program *ureg,
1176 struct ureg_dst dst )
1177 {
1178 unsigned size = 1 + (dst.Indirect ? 1 : 0) +
1179 (dst.Dimension ? (dst.DimIndirect ? 2 : 1) : 0);
1180
1181 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
1182 unsigned n = 0;
1183
1184 assert(dst.File != TGSI_FILE_NULL);
1185 assert(dst.File != TGSI_FILE_SAMPLER);
1186 assert(dst.File != TGSI_FILE_SAMPLER_VIEW);
1187 assert(dst.File != TGSI_FILE_IMMEDIATE);
1188 assert(dst.File < TGSI_FILE_COUNT);
1189
1190 out[n].value = 0;
1191 out[n].dst.File = dst.File;
1192 out[n].dst.WriteMask = dst.WriteMask;
1193 out[n].dst.Indirect = dst.Indirect;
1194 out[n].dst.Index = dst.Index;
1195 n++;
1196
1197 if (dst.Indirect) {
1198 out[n].value = 0;
1199 out[n].ind.File = dst.IndirectFile;
1200 out[n].ind.Swizzle = dst.IndirectSwizzle;
1201 out[n].ind.Index = dst.IndirectIndex;
1202 if (!ureg->supports_any_inout_decl_range &&
1203 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
1204 out[n].ind.ArrayID = 0;
1205 else
1206 out[n].ind.ArrayID = dst.ArrayID;
1207 n++;
1208 }
1209
1210 if (dst.Dimension) {
1211 out[0].dst.Dimension = 1;
1212 out[n].dim.Dimension = 0;
1213 out[n].dim.Padding = 0;
1214 if (dst.DimIndirect) {
1215 out[n].dim.Indirect = 1;
1216 out[n].dim.Index = dst.DimensionIndex;
1217 n++;
1218 out[n].value = 0;
1219 out[n].ind.File = dst.DimIndFile;
1220 out[n].ind.Swizzle = dst.DimIndSwizzle;
1221 out[n].ind.Index = dst.DimIndIndex;
1222 if (!ureg->supports_any_inout_decl_range &&
1223 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
1224 out[n].ind.ArrayID = 0;
1225 else
1226 out[n].ind.ArrayID = dst.ArrayID;
1227 } else {
1228 out[n].dim.Indirect = 0;
1229 out[n].dim.Index = dst.DimensionIndex;
1230 }
1231 n++;
1232 }
1233
1234 assert(n == size);
1235 }
1236
1237
validate(enum tgsi_opcode opcode,unsigned nr_dst,unsigned nr_src)1238 static void validate( enum tgsi_opcode opcode,
1239 unsigned nr_dst,
1240 unsigned nr_src )
1241 {
1242 #ifndef NDEBUG
1243 const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
1244 assert(info);
1245 if (info) {
1246 assert(nr_dst == info->num_dst);
1247 assert(nr_src == info->num_src);
1248 }
1249 #endif
1250 }
1251
1252 struct ureg_emit_insn_result
ureg_emit_insn(struct ureg_program * ureg,enum tgsi_opcode opcode,boolean saturate,unsigned precise,unsigned num_dst,unsigned num_src)1253 ureg_emit_insn(struct ureg_program *ureg,
1254 enum tgsi_opcode opcode,
1255 boolean saturate,
1256 unsigned precise,
1257 unsigned num_dst,
1258 unsigned num_src)
1259 {
1260 union tgsi_any_token *out;
1261 uint count = 1;
1262 struct ureg_emit_insn_result result;
1263
1264 validate( opcode, num_dst, num_src );
1265
1266 out = get_tokens( ureg, DOMAIN_INSN, count );
1267 out[0].insn = tgsi_default_instruction();
1268 out[0].insn.Opcode = opcode;
1269 out[0].insn.Saturate = saturate;
1270 out[0].insn.Precise = precise;
1271 out[0].insn.NumDstRegs = num_dst;
1272 out[0].insn.NumSrcRegs = num_src;
1273
1274 result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
1275 result.extended_token = result.insn_token;
1276
1277 ureg->nr_instructions++;
1278
1279 return result;
1280 }
1281
1282
1283 /**
1284 * Emit a label token.
1285 * \param label_token returns a token number indicating where the label
1286 * needs to be patched later. Later, this value should be passed to the
1287 * ureg_fixup_label() function.
1288 */
1289 void
ureg_emit_label(struct ureg_program * ureg,unsigned extended_token,unsigned * label_token)1290 ureg_emit_label(struct ureg_program *ureg,
1291 unsigned extended_token,
1292 unsigned *label_token )
1293 {
1294 union tgsi_any_token *out, *insn;
1295
1296 if (!label_token)
1297 return;
1298
1299 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1300 out[0].value = 0;
1301
1302 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1303 insn->insn.Label = 1;
1304
1305 *label_token = ureg->domain[DOMAIN_INSN].count - 1;
1306 }
1307
1308 /* Will return a number which can be used in a label to point to the
1309 * next instruction to be emitted.
1310 */
1311 unsigned
ureg_get_instruction_number(struct ureg_program * ureg)1312 ureg_get_instruction_number( struct ureg_program *ureg )
1313 {
1314 return ureg->nr_instructions;
1315 }
1316
1317 /* Patch a given label (expressed as a token number) to point to a
1318 * given instruction (expressed as an instruction number).
1319 */
1320 void
ureg_fixup_label(struct ureg_program * ureg,unsigned label_token,unsigned instruction_number)1321 ureg_fixup_label(struct ureg_program *ureg,
1322 unsigned label_token,
1323 unsigned instruction_number )
1324 {
1325 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
1326
1327 out->insn_label.Label = instruction_number;
1328 }
1329
1330
1331 void
ureg_emit_texture(struct ureg_program * ureg,unsigned extended_token,enum tgsi_texture_type target,enum tgsi_return_type return_type,unsigned num_offsets)1332 ureg_emit_texture(struct ureg_program *ureg,
1333 unsigned extended_token,
1334 enum tgsi_texture_type target,
1335 enum tgsi_return_type return_type, unsigned num_offsets)
1336 {
1337 union tgsi_any_token *out, *insn;
1338
1339 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1340 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1341
1342 insn->insn.Texture = 1;
1343
1344 out[0].value = 0;
1345 out[0].insn_texture.Texture = target;
1346 out[0].insn_texture.NumOffsets = num_offsets;
1347 out[0].insn_texture.ReturnType = return_type;
1348 }
1349
1350 void
ureg_emit_texture_offset(struct ureg_program * ureg,const struct tgsi_texture_offset * offset)1351 ureg_emit_texture_offset(struct ureg_program *ureg,
1352 const struct tgsi_texture_offset *offset)
1353 {
1354 union tgsi_any_token *out;
1355
1356 out = get_tokens( ureg, DOMAIN_INSN, 1);
1357
1358 out[0].value = 0;
1359 out[0].insn_texture_offset = *offset;
1360 }
1361
1362 void
ureg_emit_memory(struct ureg_program * ureg,unsigned extended_token,unsigned qualifier,enum tgsi_texture_type texture,enum pipe_format format)1363 ureg_emit_memory(struct ureg_program *ureg,
1364 unsigned extended_token,
1365 unsigned qualifier,
1366 enum tgsi_texture_type texture,
1367 enum pipe_format format)
1368 {
1369 union tgsi_any_token *out, *insn;
1370
1371 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1372 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1373
1374 insn->insn.Memory = 1;
1375
1376 out[0].value = 0;
1377 out[0].insn_memory.Qualifier = qualifier;
1378 out[0].insn_memory.Texture = texture;
1379 out[0].insn_memory.Format = format;
1380 }
1381
1382 void
ureg_fixup_insn_size(struct ureg_program * ureg,unsigned insn)1383 ureg_fixup_insn_size(struct ureg_program *ureg,
1384 unsigned insn )
1385 {
1386 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn );
1387
1388 assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
1389 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1;
1390 }
1391
1392
1393 void
ureg_insn(struct ureg_program * ureg,enum tgsi_opcode opcode,const struct ureg_dst * dst,unsigned nr_dst,const struct ureg_src * src,unsigned nr_src,unsigned precise)1394 ureg_insn(struct ureg_program *ureg,
1395 enum tgsi_opcode opcode,
1396 const struct ureg_dst *dst,
1397 unsigned nr_dst,
1398 const struct ureg_src *src,
1399 unsigned nr_src,
1400 unsigned precise )
1401 {
1402 struct ureg_emit_insn_result insn;
1403 unsigned i;
1404 boolean saturate;
1405
1406 if (nr_dst && ureg_dst_is_empty(dst[0])) {
1407 return;
1408 }
1409
1410 saturate = nr_dst ? dst[0].Saturate : FALSE;
1411
1412 insn = ureg_emit_insn(ureg,
1413 opcode,
1414 saturate,
1415 precise,
1416 nr_dst,
1417 nr_src);
1418
1419 for (i = 0; i < nr_dst; i++)
1420 ureg_emit_dst( ureg, dst[i] );
1421
1422 for (i = 0; i < nr_src; i++)
1423 ureg_emit_src( ureg, src[i] );
1424
1425 ureg_fixup_insn_size( ureg, insn.insn_token );
1426 }
1427
1428 void
ureg_tex_insn(struct ureg_program * ureg,enum tgsi_opcode opcode,const struct ureg_dst * dst,unsigned nr_dst,enum tgsi_texture_type target,enum tgsi_return_type return_type,const struct tgsi_texture_offset * texoffsets,unsigned nr_offset,const struct ureg_src * src,unsigned nr_src)1429 ureg_tex_insn(struct ureg_program *ureg,
1430 enum tgsi_opcode opcode,
1431 const struct ureg_dst *dst,
1432 unsigned nr_dst,
1433 enum tgsi_texture_type target,
1434 enum tgsi_return_type return_type,
1435 const struct tgsi_texture_offset *texoffsets,
1436 unsigned nr_offset,
1437 const struct ureg_src *src,
1438 unsigned nr_src )
1439 {
1440 struct ureg_emit_insn_result insn;
1441 unsigned i;
1442 boolean saturate;
1443
1444 if (nr_dst && ureg_dst_is_empty(dst[0])) {
1445 return;
1446 }
1447
1448 saturate = nr_dst ? dst[0].Saturate : FALSE;
1449
1450 insn = ureg_emit_insn(ureg,
1451 opcode,
1452 saturate,
1453 0,
1454 nr_dst,
1455 nr_src);
1456
1457 ureg_emit_texture( ureg, insn.extended_token, target, return_type,
1458 nr_offset );
1459
1460 for (i = 0; i < nr_offset; i++)
1461 ureg_emit_texture_offset( ureg, &texoffsets[i]);
1462
1463 for (i = 0; i < nr_dst; i++)
1464 ureg_emit_dst( ureg, dst[i] );
1465
1466 for (i = 0; i < nr_src; i++)
1467 ureg_emit_src( ureg, src[i] );
1468
1469 ureg_fixup_insn_size( ureg, insn.insn_token );
1470 }
1471
1472
1473 void
ureg_memory_insn(struct ureg_program * ureg,enum tgsi_opcode opcode,const struct ureg_dst * dst,unsigned nr_dst,const struct ureg_src * src,unsigned nr_src,unsigned qualifier,enum tgsi_texture_type texture,enum pipe_format format)1474 ureg_memory_insn(struct ureg_program *ureg,
1475 enum tgsi_opcode opcode,
1476 const struct ureg_dst *dst,
1477 unsigned nr_dst,
1478 const struct ureg_src *src,
1479 unsigned nr_src,
1480 unsigned qualifier,
1481 enum tgsi_texture_type texture,
1482 enum pipe_format format)
1483 {
1484 struct ureg_emit_insn_result insn;
1485 unsigned i;
1486
1487 insn = ureg_emit_insn(ureg,
1488 opcode,
1489 FALSE,
1490 0,
1491 nr_dst,
1492 nr_src);
1493
1494 ureg_emit_memory(ureg, insn.extended_token, qualifier, texture, format);
1495
1496 for (i = 0; i < nr_dst; i++)
1497 ureg_emit_dst(ureg, dst[i]);
1498
1499 for (i = 0; i < nr_src; i++)
1500 ureg_emit_src(ureg, src[i]);
1501
1502 ureg_fixup_insn_size(ureg, insn.insn_token);
1503 }
1504
1505
1506 static void
emit_decl_semantic(struct ureg_program * ureg,unsigned file,unsigned first,unsigned last,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned streams,unsigned usage_mask,unsigned array_id,boolean invariant)1507 emit_decl_semantic(struct ureg_program *ureg,
1508 unsigned file,
1509 unsigned first,
1510 unsigned last,
1511 enum tgsi_semantic semantic_name,
1512 unsigned semantic_index,
1513 unsigned streams,
1514 unsigned usage_mask,
1515 unsigned array_id,
1516 boolean invariant)
1517 {
1518 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
1519
1520 out[0].value = 0;
1521 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1522 out[0].decl.NrTokens = 3;
1523 out[0].decl.File = file;
1524 out[0].decl.UsageMask = usage_mask;
1525 out[0].decl.Semantic = 1;
1526 out[0].decl.Array = array_id != 0;
1527 out[0].decl.Invariant = invariant;
1528
1529 out[1].value = 0;
1530 out[1].decl_range.First = first;
1531 out[1].decl_range.Last = last;
1532
1533 out[2].value = 0;
1534 out[2].decl_semantic.Name = semantic_name;
1535 out[2].decl_semantic.Index = semantic_index;
1536 out[2].decl_semantic.StreamX = streams & 3;
1537 out[2].decl_semantic.StreamY = (streams >> 2) & 3;
1538 out[2].decl_semantic.StreamZ = (streams >> 4) & 3;
1539 out[2].decl_semantic.StreamW = (streams >> 6) & 3;
1540
1541 if (array_id) {
1542 out[3].value = 0;
1543 out[3].array.ArrayID = array_id;
1544 }
1545 }
1546
1547 static void
emit_decl_atomic_2d(struct ureg_program * ureg,unsigned first,unsigned last,unsigned index2D,unsigned array_id)1548 emit_decl_atomic_2d(struct ureg_program *ureg,
1549 unsigned first,
1550 unsigned last,
1551 unsigned index2D,
1552 unsigned array_id)
1553 {
1554 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
1555
1556 out[0].value = 0;
1557 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1558 out[0].decl.NrTokens = 3;
1559 out[0].decl.File = TGSI_FILE_HW_ATOMIC;
1560 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1561 out[0].decl.Dimension = 1;
1562 out[0].decl.Array = array_id != 0;
1563
1564 out[1].value = 0;
1565 out[1].decl_range.First = first;
1566 out[1].decl_range.Last = last;
1567
1568 out[2].value = 0;
1569 out[2].decl_dim.Index2D = index2D;
1570
1571 if (array_id) {
1572 out[3].value = 0;
1573 out[3].array.ArrayID = array_id;
1574 }
1575 }
1576
1577 static void
emit_decl_fs(struct ureg_program * ureg,unsigned file,unsigned first,unsigned last,enum tgsi_semantic semantic_name,unsigned semantic_index,enum tgsi_interpolate_mode interpolate,enum tgsi_interpolate_loc interpolate_location,unsigned array_id,unsigned usage_mask)1578 emit_decl_fs(struct ureg_program *ureg,
1579 unsigned file,
1580 unsigned first,
1581 unsigned last,
1582 enum tgsi_semantic semantic_name,
1583 unsigned semantic_index,
1584 enum tgsi_interpolate_mode interpolate,
1585 enum tgsi_interpolate_loc interpolate_location,
1586 unsigned array_id,
1587 unsigned usage_mask)
1588 {
1589 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL,
1590 array_id ? 5 : 4);
1591
1592 out[0].value = 0;
1593 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1594 out[0].decl.NrTokens = 4;
1595 out[0].decl.File = file;
1596 out[0].decl.UsageMask = usage_mask;
1597 out[0].decl.Interpolate = 1;
1598 out[0].decl.Semantic = 1;
1599 out[0].decl.Array = array_id != 0;
1600
1601 out[1].value = 0;
1602 out[1].decl_range.First = first;
1603 out[1].decl_range.Last = last;
1604
1605 out[2].value = 0;
1606 out[2].decl_interp.Interpolate = interpolate;
1607 out[2].decl_interp.Location = interpolate_location;
1608
1609 out[3].value = 0;
1610 out[3].decl_semantic.Name = semantic_name;
1611 out[3].decl_semantic.Index = semantic_index;
1612
1613 if (array_id) {
1614 out[4].value = 0;
1615 out[4].array.ArrayID = array_id;
1616 }
1617 }
1618
1619 static void
emit_decl_temps(struct ureg_program * ureg,unsigned first,unsigned last,boolean local,unsigned arrayid)1620 emit_decl_temps( struct ureg_program *ureg,
1621 unsigned first, unsigned last,
1622 boolean local,
1623 unsigned arrayid )
1624 {
1625 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL,
1626 arrayid ? 3 : 2 );
1627
1628 out[0].value = 0;
1629 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1630 out[0].decl.NrTokens = 2;
1631 out[0].decl.File = TGSI_FILE_TEMPORARY;
1632 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1633 out[0].decl.Local = local;
1634
1635 out[1].value = 0;
1636 out[1].decl_range.First = first;
1637 out[1].decl_range.Last = last;
1638
1639 if (arrayid) {
1640 out[0].decl.Array = 1;
1641 out[2].value = 0;
1642 out[2].array.ArrayID = arrayid;
1643 }
1644 }
1645
emit_decl_range(struct ureg_program * ureg,unsigned file,unsigned first,unsigned count)1646 static void emit_decl_range( struct ureg_program *ureg,
1647 unsigned file,
1648 unsigned first,
1649 unsigned count )
1650 {
1651 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1652
1653 out[0].value = 0;
1654 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1655 out[0].decl.NrTokens = 2;
1656 out[0].decl.File = file;
1657 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1658 out[0].decl.Semantic = 0;
1659
1660 out[1].value = 0;
1661 out[1].decl_range.First = first;
1662 out[1].decl_range.Last = first + count - 1;
1663 }
1664
1665 static void
emit_decl_range2D(struct ureg_program * ureg,unsigned file,unsigned first,unsigned last,unsigned index2D)1666 emit_decl_range2D(struct ureg_program *ureg,
1667 unsigned file,
1668 unsigned first,
1669 unsigned last,
1670 unsigned index2D)
1671 {
1672 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1673
1674 out[0].value = 0;
1675 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1676 out[0].decl.NrTokens = 3;
1677 out[0].decl.File = file;
1678 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1679 out[0].decl.Dimension = 1;
1680
1681 out[1].value = 0;
1682 out[1].decl_range.First = first;
1683 out[1].decl_range.Last = last;
1684
1685 out[2].value = 0;
1686 out[2].decl_dim.Index2D = index2D;
1687 }
1688
1689 static void
emit_decl_sampler_view(struct ureg_program * ureg,unsigned index,enum tgsi_texture_type target,enum tgsi_return_type return_type_x,enum tgsi_return_type return_type_y,enum tgsi_return_type return_type_z,enum tgsi_return_type return_type_w)1690 emit_decl_sampler_view(struct ureg_program *ureg,
1691 unsigned index,
1692 enum tgsi_texture_type target,
1693 enum tgsi_return_type return_type_x,
1694 enum tgsi_return_type return_type_y,
1695 enum tgsi_return_type return_type_z,
1696 enum tgsi_return_type return_type_w )
1697 {
1698 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1699
1700 out[0].value = 0;
1701 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1702 out[0].decl.NrTokens = 3;
1703 out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
1704 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1705
1706 out[1].value = 0;
1707 out[1].decl_range.First = index;
1708 out[1].decl_range.Last = index;
1709
1710 out[2].value = 0;
1711 out[2].decl_sampler_view.Resource = target;
1712 out[2].decl_sampler_view.ReturnTypeX = return_type_x;
1713 out[2].decl_sampler_view.ReturnTypeY = return_type_y;
1714 out[2].decl_sampler_view.ReturnTypeZ = return_type_z;
1715 out[2].decl_sampler_view.ReturnTypeW = return_type_w;
1716 }
1717
1718 static void
emit_decl_image(struct ureg_program * ureg,unsigned index,enum tgsi_texture_type target,enum pipe_format format,boolean wr,boolean raw)1719 emit_decl_image(struct ureg_program *ureg,
1720 unsigned index,
1721 enum tgsi_texture_type target,
1722 enum pipe_format format,
1723 boolean wr,
1724 boolean raw)
1725 {
1726 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1727
1728 out[0].value = 0;
1729 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1730 out[0].decl.NrTokens = 3;
1731 out[0].decl.File = TGSI_FILE_IMAGE;
1732 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1733
1734 out[1].value = 0;
1735 out[1].decl_range.First = index;
1736 out[1].decl_range.Last = index;
1737
1738 out[2].value = 0;
1739 out[2].decl_image.Resource = target;
1740 out[2].decl_image.Writable = wr;
1741 out[2].decl_image.Raw = raw;
1742 out[2].decl_image.Format = format;
1743 }
1744
1745 static void
emit_decl_buffer(struct ureg_program * ureg,unsigned index,bool atomic)1746 emit_decl_buffer(struct ureg_program *ureg,
1747 unsigned index,
1748 bool atomic)
1749 {
1750 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1751
1752 out[0].value = 0;
1753 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1754 out[0].decl.NrTokens = 2;
1755 out[0].decl.File = TGSI_FILE_BUFFER;
1756 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1757 out[0].decl.Atomic = atomic;
1758
1759 out[1].value = 0;
1760 out[1].decl_range.First = index;
1761 out[1].decl_range.Last = index;
1762 }
1763
1764 static void
emit_decl_memory(struct ureg_program * ureg,unsigned memory_type)1765 emit_decl_memory(struct ureg_program *ureg, unsigned memory_type)
1766 {
1767 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1768
1769 out[0].value = 0;
1770 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1771 out[0].decl.NrTokens = 2;
1772 out[0].decl.File = TGSI_FILE_MEMORY;
1773 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1774 out[0].decl.MemType = memory_type;
1775
1776 out[1].value = 0;
1777 out[1].decl_range.First = memory_type;
1778 out[1].decl_range.Last = memory_type;
1779 }
1780
1781 static void
emit_immediate(struct ureg_program * ureg,const unsigned * v,unsigned type)1782 emit_immediate( struct ureg_program *ureg,
1783 const unsigned *v,
1784 unsigned type )
1785 {
1786 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
1787
1788 out[0].value = 0;
1789 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
1790 out[0].imm.NrTokens = 5;
1791 out[0].imm.DataType = type;
1792 out[0].imm.Padding = 0;
1793
1794 out[1].imm_data.Uint = v[0];
1795 out[2].imm_data.Uint = v[1];
1796 out[3].imm_data.Uint = v[2];
1797 out[4].imm_data.Uint = v[3];
1798 }
1799
1800 static void
emit_property(struct ureg_program * ureg,unsigned name,unsigned data)1801 emit_property(struct ureg_program *ureg,
1802 unsigned name,
1803 unsigned data)
1804 {
1805 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1806
1807 out[0].value = 0;
1808 out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY;
1809 out[0].prop.NrTokens = 2;
1810 out[0].prop.PropertyName = name;
1811
1812 out[1].prop_data.Data = data;
1813 }
1814
1815 static int
input_sort(const void * in_a,const void * in_b)1816 input_sort(const void *in_a, const void *in_b)
1817 {
1818 const struct ureg_input_decl *a = in_a, *b = in_b;
1819
1820 return a->first - b->first;
1821 }
1822
1823 static int
output_sort(const void * in_a,const void * in_b)1824 output_sort(const void *in_a, const void *in_b)
1825 {
1826 const struct ureg_output_decl *a = in_a, *b = in_b;
1827
1828 return a->first - b->first;
1829 }
1830
emit_decls(struct ureg_program * ureg)1831 static void emit_decls( struct ureg_program *ureg )
1832 {
1833 unsigned i,j;
1834
1835 for (i = 0; i < ARRAY_SIZE(ureg->properties); i++)
1836 if (ureg->properties[i] != ~0u)
1837 emit_property(ureg, i, ureg->properties[i]);
1838
1839 /* While not required by TGSI spec, virglrenderer has a dependency on the
1840 * inputs being sorted.
1841 */
1842 qsort(ureg->input, ureg->nr_inputs, sizeof(ureg->input[0]), input_sort);
1843
1844 if (ureg->processor == PIPE_SHADER_VERTEX) {
1845 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
1846 if (ureg->vs_inputs[i/32] & (1u << (i%32))) {
1847 emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
1848 }
1849 }
1850 } else if (ureg->processor == PIPE_SHADER_FRAGMENT) {
1851 if (ureg->supports_any_inout_decl_range) {
1852 for (i = 0; i < ureg->nr_inputs; i++) {
1853 emit_decl_fs(ureg,
1854 TGSI_FILE_INPUT,
1855 ureg->input[i].first,
1856 ureg->input[i].last,
1857 ureg->input[i].semantic_name,
1858 ureg->input[i].semantic_index,
1859 ureg->input[i].interp,
1860 ureg->input[i].interp_location,
1861 ureg->input[i].array_id,
1862 ureg->input[i].usage_mask);
1863 }
1864 }
1865 else {
1866 for (i = 0; i < ureg->nr_inputs; i++) {
1867 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
1868 emit_decl_fs(ureg,
1869 TGSI_FILE_INPUT,
1870 j, j,
1871 ureg->input[i].semantic_name,
1872 ureg->input[i].semantic_index +
1873 (j - ureg->input[i].first),
1874 ureg->input[i].interp,
1875 ureg->input[i].interp_location, 0,
1876 ureg->input[i].usage_mask);
1877 }
1878 }
1879 }
1880 } else {
1881 if (ureg->supports_any_inout_decl_range) {
1882 for (i = 0; i < ureg->nr_inputs; i++) {
1883 emit_decl_semantic(ureg,
1884 TGSI_FILE_INPUT,
1885 ureg->input[i].first,
1886 ureg->input[i].last,
1887 ureg->input[i].semantic_name,
1888 ureg->input[i].semantic_index,
1889 0,
1890 TGSI_WRITEMASK_XYZW,
1891 ureg->input[i].array_id,
1892 FALSE);
1893 }
1894 }
1895 else {
1896 for (i = 0; i < ureg->nr_inputs; i++) {
1897 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
1898 emit_decl_semantic(ureg,
1899 TGSI_FILE_INPUT,
1900 j, j,
1901 ureg->input[i].semantic_name,
1902 ureg->input[i].semantic_index +
1903 (j - ureg->input[i].first),
1904 0,
1905 TGSI_WRITEMASK_XYZW, 0, FALSE);
1906 }
1907 }
1908 }
1909 }
1910
1911 for (i = 0; i < ureg->nr_system_values; i++) {
1912 emit_decl_semantic(ureg,
1913 TGSI_FILE_SYSTEM_VALUE,
1914 i,
1915 i,
1916 ureg->system_value[i].semantic_name,
1917 ureg->system_value[i].semantic_index,
1918 0,
1919 TGSI_WRITEMASK_XYZW, 0, FALSE);
1920 }
1921
1922 /* While not required by TGSI spec, virglrenderer has a dependency on the
1923 * outputs being sorted.
1924 */
1925 qsort(ureg->output, ureg->nr_outputs, sizeof(ureg->output[0]), output_sort);
1926
1927 if (ureg->supports_any_inout_decl_range) {
1928 for (i = 0; i < ureg->nr_outputs; i++) {
1929 emit_decl_semantic(ureg,
1930 TGSI_FILE_OUTPUT,
1931 ureg->output[i].first,
1932 ureg->output[i].last,
1933 ureg->output[i].semantic_name,
1934 ureg->output[i].semantic_index,
1935 ureg->output[i].streams,
1936 ureg->output[i].usage_mask,
1937 ureg->output[i].array_id,
1938 ureg->output[i].invariant);
1939 }
1940 }
1941 else {
1942 for (i = 0; i < ureg->nr_outputs; i++) {
1943 for (j = ureg->output[i].first; j <= ureg->output[i].last; j++) {
1944 emit_decl_semantic(ureg,
1945 TGSI_FILE_OUTPUT,
1946 j, j,
1947 ureg->output[i].semantic_name,
1948 ureg->output[i].semantic_index +
1949 (j - ureg->output[i].first),
1950 ureg->output[i].streams,
1951 ureg->output[i].usage_mask,
1952 0,
1953 ureg->output[i].invariant);
1954 }
1955 }
1956 }
1957
1958 for (i = 0; i < ureg->nr_samplers; i++) {
1959 emit_decl_range( ureg,
1960 TGSI_FILE_SAMPLER,
1961 ureg->sampler[i].Index, 1 );
1962 }
1963
1964 for (i = 0; i < ureg->nr_sampler_views; i++) {
1965 emit_decl_sampler_view(ureg,
1966 ureg->sampler_view[i].index,
1967 ureg->sampler_view[i].target,
1968 ureg->sampler_view[i].return_type_x,
1969 ureg->sampler_view[i].return_type_y,
1970 ureg->sampler_view[i].return_type_z,
1971 ureg->sampler_view[i].return_type_w);
1972 }
1973
1974 for (i = 0; i < ureg->nr_images; i++) {
1975 emit_decl_image(ureg,
1976 ureg->image[i].index,
1977 ureg->image[i].target,
1978 ureg->image[i].format,
1979 ureg->image[i].wr,
1980 ureg->image[i].raw);
1981 }
1982
1983 for (i = 0; i < ureg->nr_buffers; i++) {
1984 emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
1985 }
1986
1987 for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) {
1988 if (ureg->use_memory[i])
1989 emit_decl_memory(ureg, i);
1990 }
1991
1992 for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
1993 struct const_decl *decl = &ureg->const_decls[i];
1994
1995 if (decl->nr_constant_ranges) {
1996 uint j;
1997
1998 for (j = 0; j < decl->nr_constant_ranges; j++) {
1999 emit_decl_range2D(ureg,
2000 TGSI_FILE_CONSTANT,
2001 decl->constant_range[j].first,
2002 decl->constant_range[j].last,
2003 i);
2004 }
2005 }
2006 }
2007
2008 for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) {
2009 struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[i];
2010
2011 if (decl->nr_hw_atomic_ranges) {
2012 uint j;
2013
2014 for (j = 0; j < decl->nr_hw_atomic_ranges; j++) {
2015 emit_decl_atomic_2d(ureg,
2016 decl->hw_atomic_range[j].first,
2017 decl->hw_atomic_range[j].last,
2018 i,
2019 decl->hw_atomic_range[j].array_id);
2020 }
2021 }
2022 }
2023
2024 if (ureg->nr_temps) {
2025 unsigned array = 0;
2026 for (i = 0; i < ureg->nr_temps;) {
2027 boolean local = util_bitmask_get(ureg->local_temps, i);
2028 unsigned first = i;
2029 i = util_bitmask_get_next_index(ureg->decl_temps, i + 1);
2030 if (i == UTIL_BITMASK_INVALID_INDEX)
2031 i = ureg->nr_temps;
2032
2033 if (array < ureg->nr_array_temps && ureg->array_temps[array] == first)
2034 emit_decl_temps( ureg, first, i - 1, local, ++array );
2035 else
2036 emit_decl_temps( ureg, first, i - 1, local, 0 );
2037 }
2038 }
2039
2040 if (ureg->nr_addrs) {
2041 emit_decl_range( ureg,
2042 TGSI_FILE_ADDRESS,
2043 0, ureg->nr_addrs );
2044 }
2045
2046 for (i = 0; i < ureg->nr_immediates; i++) {
2047 emit_immediate( ureg,
2048 ureg->immediate[i].value.u,
2049 ureg->immediate[i].type );
2050 }
2051 }
2052
2053 /* Append the instruction tokens onto the declarations to build a
2054 * contiguous stream suitable to send to the driver.
2055 */
copy_instructions(struct ureg_program * ureg)2056 static void copy_instructions( struct ureg_program *ureg )
2057 {
2058 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count;
2059 union tgsi_any_token *out = get_tokens( ureg,
2060 DOMAIN_DECL,
2061 nr_tokens );
2062
2063 memcpy(out,
2064 ureg->domain[DOMAIN_INSN].tokens,
2065 nr_tokens * sizeof out[0] );
2066 }
2067
2068
2069 static void
fixup_header_size(struct ureg_program * ureg)2070 fixup_header_size(struct ureg_program *ureg)
2071 {
2072 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 );
2073
2074 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2;
2075 }
2076
2077
2078 static void
emit_header(struct ureg_program * ureg)2079 emit_header( struct ureg_program *ureg )
2080 {
2081 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
2082
2083 out[0].header.HeaderSize = 2;
2084 out[0].header.BodySize = 0;
2085
2086 out[1].processor.Processor = ureg->processor;
2087 out[1].processor.Padding = 0;
2088 }
2089
2090
ureg_finalize(struct ureg_program * ureg)2091 const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
2092 {
2093 const struct tgsi_token *tokens;
2094
2095 switch (ureg->processor) {
2096 case PIPE_SHADER_VERTEX:
2097 case PIPE_SHADER_TESS_EVAL:
2098 ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER,
2099 ureg->next_shader_processor == -1 ?
2100 PIPE_SHADER_FRAGMENT :
2101 ureg->next_shader_processor);
2102 break;
2103 default:
2104 ; /* nothing */
2105 }
2106
2107 emit_header( ureg );
2108 emit_decls( ureg );
2109 copy_instructions( ureg );
2110 fixup_header_size( ureg );
2111
2112 if (ureg->domain[0].tokens == error_tokens ||
2113 ureg->domain[1].tokens == error_tokens) {
2114 debug_printf("%s: error in generated shader\n", __FUNCTION__);
2115 assert(0);
2116 return NULL;
2117 }
2118
2119 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
2120
2121 if (0) {
2122 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
2123 ureg->domain[DOMAIN_DECL].count);
2124 tgsi_dump( tokens, 0 );
2125 }
2126
2127 #if DEBUG
2128 /* tgsi_sanity doesn't seem to return if there are too many constants. */
2129 bool too_many_constants = false;
2130 for (unsigned i = 0; i < ARRAY_SIZE(ureg->const_decls); i++) {
2131 for (unsigned j = 0; j < ureg->const_decls[i].nr_constant_ranges; j++) {
2132 if (ureg->const_decls[i].constant_range[j].last > 4096) {
2133 too_many_constants = true;
2134 break;
2135 }
2136 }
2137 }
2138
2139 if (tokens && !too_many_constants && !tgsi_sanity_check(tokens)) {
2140 debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
2141 tgsi_dump(tokens, 0);
2142 assert(0);
2143 }
2144 #endif
2145
2146
2147 return tokens;
2148 }
2149
2150
ureg_create_shader(struct ureg_program * ureg,struct pipe_context * pipe,const struct pipe_stream_output_info * so)2151 void *ureg_create_shader( struct ureg_program *ureg,
2152 struct pipe_context *pipe,
2153 const struct pipe_stream_output_info *so )
2154 {
2155 struct pipe_shader_state state = {0};
2156
2157 pipe_shader_state_from_tgsi(&state, ureg_finalize(ureg));
2158 if(!state.tokens)
2159 return NULL;
2160
2161 if (so)
2162 state.stream_output = *so;
2163
2164 switch (ureg->processor) {
2165 case PIPE_SHADER_VERTEX:
2166 return pipe->create_vs_state(pipe, &state);
2167 case PIPE_SHADER_TESS_CTRL:
2168 return pipe->create_tcs_state(pipe, &state);
2169 case PIPE_SHADER_TESS_EVAL:
2170 return pipe->create_tes_state(pipe, &state);
2171 case PIPE_SHADER_GEOMETRY:
2172 return pipe->create_gs_state(pipe, &state);
2173 case PIPE_SHADER_FRAGMENT:
2174 return pipe->create_fs_state(pipe, &state);
2175 default:
2176 return NULL;
2177 }
2178 }
2179
2180
ureg_get_tokens(struct ureg_program * ureg,unsigned * nr_tokens)2181 const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
2182 unsigned *nr_tokens )
2183 {
2184 const struct tgsi_token *tokens;
2185
2186 ureg_finalize(ureg);
2187
2188 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
2189
2190 if (nr_tokens)
2191 *nr_tokens = ureg->domain[DOMAIN_DECL].count;
2192
2193 ureg->domain[DOMAIN_DECL].tokens = 0;
2194 ureg->domain[DOMAIN_DECL].size = 0;
2195 ureg->domain[DOMAIN_DECL].order = 0;
2196 ureg->domain[DOMAIN_DECL].count = 0;
2197
2198 return tokens;
2199 }
2200
2201
ureg_free_tokens(const struct tgsi_token * tokens)2202 void ureg_free_tokens( const struct tgsi_token *tokens )
2203 {
2204 FREE((struct tgsi_token *)tokens);
2205 }
2206
2207
2208 struct ureg_program *
ureg_create(enum pipe_shader_type processor)2209 ureg_create(enum pipe_shader_type processor)
2210 {
2211 return ureg_create_with_screen(processor, NULL);
2212 }
2213
2214
2215 struct ureg_program *
ureg_create_with_screen(enum pipe_shader_type processor,struct pipe_screen * screen)2216 ureg_create_with_screen(enum pipe_shader_type processor,
2217 struct pipe_screen *screen)
2218 {
2219 uint i;
2220 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
2221 if (!ureg)
2222 goto no_ureg;
2223
2224 ureg->processor = processor;
2225 ureg->supports_any_inout_decl_range =
2226 screen &&
2227 screen->get_shader_param(screen, processor,
2228 PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0;
2229 ureg->next_shader_processor = -1;
2230
2231 for (i = 0; i < ARRAY_SIZE(ureg->properties); i++)
2232 ureg->properties[i] = ~0;
2233
2234 ureg->free_temps = util_bitmask_create();
2235 if (ureg->free_temps == NULL)
2236 goto no_free_temps;
2237
2238 ureg->local_temps = util_bitmask_create();
2239 if (ureg->local_temps == NULL)
2240 goto no_local_temps;
2241
2242 ureg->decl_temps = util_bitmask_create();
2243 if (ureg->decl_temps == NULL)
2244 goto no_decl_temps;
2245
2246 return ureg;
2247
2248 no_decl_temps:
2249 util_bitmask_destroy(ureg->local_temps);
2250 no_local_temps:
2251 util_bitmask_destroy(ureg->free_temps);
2252 no_free_temps:
2253 FREE(ureg);
2254 no_ureg:
2255 return NULL;
2256 }
2257
2258
2259 void
ureg_set_next_shader_processor(struct ureg_program * ureg,unsigned processor)2260 ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor)
2261 {
2262 ureg->next_shader_processor = processor;
2263 }
2264
2265
2266 unsigned
ureg_get_nr_outputs(const struct ureg_program * ureg)2267 ureg_get_nr_outputs( const struct ureg_program *ureg )
2268 {
2269 if (!ureg)
2270 return 0;
2271 return ureg->nr_outputs;
2272 }
2273
2274 static void
ureg_setup_clipdist_info(struct ureg_program * ureg,const struct shader_info * info)2275 ureg_setup_clipdist_info(struct ureg_program *ureg,
2276 const struct shader_info *info)
2277 {
2278 if (info->clip_distance_array_size)
2279 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
2280 info->clip_distance_array_size);
2281 if (info->cull_distance_array_size)
2282 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2283 info->cull_distance_array_size);
2284 }
2285
2286 static void
ureg_setup_tess_ctrl_shader(struct ureg_program * ureg,const struct shader_info * info)2287 ureg_setup_tess_ctrl_shader(struct ureg_program *ureg,
2288 const struct shader_info *info)
2289 {
2290 ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
2291 info->tess.tcs_vertices_out);
2292 }
2293
2294 static void
ureg_setup_tess_eval_shader(struct ureg_program * ureg,const struct shader_info * info)2295 ureg_setup_tess_eval_shader(struct ureg_program *ureg,
2296 const struct shader_info *info)
2297 {
2298 if (info->tess.primitive_mode == GL_ISOLINES)
2299 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, GL_LINES);
2300 else
2301 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE,
2302 info->tess.primitive_mode);
2303
2304 STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
2305 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
2306 PIPE_TESS_SPACING_FRACTIONAL_ODD);
2307 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
2308 PIPE_TESS_SPACING_FRACTIONAL_EVEN);
2309
2310 ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
2311 (info->tess.spacing + 1) % 3);
2312
2313 ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
2314 !info->tess.ccw);
2315 ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
2316 info->tess.point_mode);
2317 }
2318
2319 static void
ureg_setup_geometry_shader(struct ureg_program * ureg,const struct shader_info * info)2320 ureg_setup_geometry_shader(struct ureg_program *ureg,
2321 const struct shader_info *info)
2322 {
2323 ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
2324 info->gs.input_primitive);
2325 ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
2326 info->gs.output_primitive);
2327 ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
2328 info->gs.vertices_out);
2329 ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
2330 info->gs.invocations);
2331 }
2332
2333 static void
ureg_setup_fragment_shader(struct ureg_program * ureg,const struct shader_info * info)2334 ureg_setup_fragment_shader(struct ureg_program *ureg,
2335 const struct shader_info *info)
2336 {
2337 if (info->fs.early_fragment_tests || info->fs.post_depth_coverage) {
2338 ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
2339
2340 if (info->fs.post_depth_coverage)
2341 ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1);
2342 }
2343
2344 if (info->fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
2345 switch (info->fs.depth_layout) {
2346 case FRAG_DEPTH_LAYOUT_ANY:
2347 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2348 TGSI_FS_DEPTH_LAYOUT_ANY);
2349 break;
2350 case FRAG_DEPTH_LAYOUT_GREATER:
2351 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2352 TGSI_FS_DEPTH_LAYOUT_GREATER);
2353 break;
2354 case FRAG_DEPTH_LAYOUT_LESS:
2355 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2356 TGSI_FS_DEPTH_LAYOUT_LESS);
2357 break;
2358 case FRAG_DEPTH_LAYOUT_UNCHANGED:
2359 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2360 TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
2361 break;
2362 default:
2363 assert(0);
2364 }
2365 }
2366
2367 if (info->fs.advanced_blend_modes) {
2368 ureg_property(ureg, TGSI_PROPERTY_FS_BLEND_EQUATION_ADVANCED,
2369 info->fs.advanced_blend_modes);
2370 }
2371 }
2372
2373 static void
ureg_setup_compute_shader(struct ureg_program * ureg,const struct shader_info * info)2374 ureg_setup_compute_shader(struct ureg_program *ureg,
2375 const struct shader_info *info)
2376 {
2377 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
2378 info->workgroup_size[0]);
2379 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
2380 info->workgroup_size[1]);
2381 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
2382 info->workgroup_size[2]);
2383
2384 if (info->shared_size)
2385 ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
2386 }
2387
2388 void
ureg_setup_shader_info(struct ureg_program * ureg,const struct shader_info * info)2389 ureg_setup_shader_info(struct ureg_program *ureg,
2390 const struct shader_info *info)
2391 {
2392 if (info->layer_viewport_relative)
2393 ureg_property(ureg, TGSI_PROPERTY_LAYER_VIEWPORT_RELATIVE, 1);
2394
2395 switch (info->stage) {
2396 case MESA_SHADER_VERTEX:
2397 ureg_setup_clipdist_info(ureg, info);
2398 ureg_set_next_shader_processor(ureg, pipe_shader_type_from_mesa(info->next_stage));
2399 break;
2400 case MESA_SHADER_TESS_CTRL:
2401 ureg_setup_tess_ctrl_shader(ureg, info);
2402 break;
2403 case MESA_SHADER_TESS_EVAL:
2404 ureg_setup_tess_eval_shader(ureg, info);
2405 ureg_setup_clipdist_info(ureg, info);
2406 ureg_set_next_shader_processor(ureg, pipe_shader_type_from_mesa(info->next_stage));
2407 break;
2408 case MESA_SHADER_GEOMETRY:
2409 ureg_setup_geometry_shader(ureg, info);
2410 ureg_setup_clipdist_info(ureg, info);
2411 break;
2412 case MESA_SHADER_FRAGMENT:
2413 ureg_setup_fragment_shader(ureg, info);
2414 break;
2415 case MESA_SHADER_COMPUTE:
2416 ureg_setup_compute_shader(ureg, info);
2417 break;
2418 default:
2419 break;
2420 }
2421 }
2422
2423
ureg_destroy(struct ureg_program * ureg)2424 void ureg_destroy( struct ureg_program *ureg )
2425 {
2426 unsigned i;
2427
2428 for (i = 0; i < ARRAY_SIZE(ureg->domain); i++) {
2429 if (ureg->domain[i].tokens &&
2430 ureg->domain[i].tokens != error_tokens)
2431 FREE(ureg->domain[i].tokens);
2432 }
2433
2434 util_bitmask_destroy(ureg->free_temps);
2435 util_bitmask_destroy(ureg->local_temps);
2436 util_bitmask_destroy(ureg->decl_temps);
2437
2438 FREE(ureg);
2439 }
2440