1 /**************************************************************************
2 *
3 * Copyright 2009-2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_screen.h"
30 #include "pipe/p_context.h"
31 #include "pipe/p_state.h"
32 #include "tgsi/tgsi_ureg.h"
33 #include "tgsi/tgsi_build.h"
34 #include "tgsi/tgsi_from_mesa.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_dump.h"
37 #include "tgsi/tgsi_sanity.h"
38 #include "util/u_debug.h"
39 #include "util/u_inlines.h"
40 #include "util/u_memory.h"
41 #include "util/u_math.h"
42 #include "util/u_prim.h"
43 #include "util/u_bitmask.h"
44 #include "GL/gl.h"
45 #include "compiler/shader_info.h"
46
47 union tgsi_any_token {
48 struct tgsi_header header;
49 struct tgsi_processor processor;
50 struct tgsi_token token;
51 struct tgsi_property prop;
52 struct tgsi_property_data prop_data;
53 struct tgsi_declaration decl;
54 struct tgsi_declaration_range decl_range;
55 struct tgsi_declaration_dimension decl_dim;
56 struct tgsi_declaration_interp decl_interp;
57 struct tgsi_declaration_image decl_image;
58 struct tgsi_declaration_semantic decl_semantic;
59 struct tgsi_declaration_sampler_view decl_sampler_view;
60 struct tgsi_declaration_array array;
61 struct tgsi_immediate imm;
62 union tgsi_immediate_data imm_data;
63 struct tgsi_instruction insn;
64 struct tgsi_instruction_label insn_label;
65 struct tgsi_instruction_texture insn_texture;
66 struct tgsi_instruction_memory insn_memory;
67 struct tgsi_texture_offset insn_texture_offset;
68 struct tgsi_src_register src;
69 struct tgsi_ind_register ind;
70 struct tgsi_dimension dim;
71 struct tgsi_dst_register dst;
72 unsigned value;
73 };
74
75
76 struct ureg_tokens {
77 union tgsi_any_token *tokens;
78 unsigned size;
79 unsigned order;
80 unsigned count;
81 };
82
83 #define UREG_MAX_INPUT (4 * PIPE_MAX_SHADER_INPUTS)
84 #define UREG_MAX_SYSTEM_VALUE PIPE_MAX_ATTRIBS
85 #define UREG_MAX_OUTPUT (4 * PIPE_MAX_SHADER_OUTPUTS)
86 #define UREG_MAX_CONSTANT_RANGE 32
87 #define UREG_MAX_HW_ATOMIC_RANGE 32
88 #define UREG_MAX_IMMEDIATE 4096
89 #define UREG_MAX_ADDR 3
90 #define UREG_MAX_ARRAY_TEMPS 256
91
92 struct const_decl {
93 struct {
94 unsigned first;
95 unsigned last;
96 } constant_range[UREG_MAX_CONSTANT_RANGE];
97 unsigned nr_constant_ranges;
98 };
99
100 struct hw_atomic_decl {
101 struct hw_atomic_decl_range {
102 unsigned first;
103 unsigned last;
104 unsigned array_id;
105 } hw_atomic_range[UREG_MAX_HW_ATOMIC_RANGE];
106 unsigned nr_hw_atomic_ranges;
107 };
108
109 #define DOMAIN_DECL 0
110 #define DOMAIN_INSN 1
111
112 struct ureg_program
113 {
114 enum pipe_shader_type processor;
115 bool supports_any_inout_decl_range;
116 int next_shader_processor;
117
118 struct ureg_input_decl {
119 enum tgsi_semantic semantic_name;
120 unsigned semantic_index;
121 enum tgsi_interpolate_mode interp;
122 unsigned char usage_mask;
123 enum tgsi_interpolate_loc interp_location;
124 unsigned first;
125 unsigned last;
126 unsigned array_id;
127 } input[UREG_MAX_INPUT];
128 unsigned nr_inputs, nr_input_regs;
129
130 unsigned vs_inputs[PIPE_MAX_ATTRIBS/32];
131
132 struct {
133 enum tgsi_semantic semantic_name;
134 unsigned semantic_index;
135 } system_value[UREG_MAX_SYSTEM_VALUE];
136 unsigned nr_system_values;
137
138 struct ureg_output_decl {
139 enum tgsi_semantic semantic_name;
140 unsigned semantic_index;
141 unsigned streams;
142 unsigned usage_mask; /* = TGSI_WRITEMASK_* */
143 unsigned first;
144 unsigned last;
145 unsigned array_id;
146 boolean invariant;
147 } output[UREG_MAX_OUTPUT];
148 unsigned nr_outputs, nr_output_regs;
149
150 struct {
151 union {
152 float f[4];
153 unsigned u[4];
154 int i[4];
155 } value;
156 unsigned nr;
157 unsigned type;
158 } immediate[UREG_MAX_IMMEDIATE];
159 unsigned nr_immediates;
160
161 struct ureg_src sampler[PIPE_MAX_SAMPLERS];
162 unsigned nr_samplers;
163
164 struct {
165 unsigned index;
166 enum tgsi_texture_type target;
167 enum tgsi_return_type return_type_x;
168 enum tgsi_return_type return_type_y;
169 enum tgsi_return_type return_type_z;
170 enum tgsi_return_type return_type_w;
171 } sampler_view[PIPE_MAX_SHADER_SAMPLER_VIEWS];
172 unsigned nr_sampler_views;
173
174 struct {
175 unsigned index;
176 enum tgsi_texture_type target;
177 enum pipe_format format;
178 boolean wr;
179 boolean raw;
180 } image[PIPE_MAX_SHADER_IMAGES];
181 unsigned nr_images;
182
183 struct {
184 unsigned index;
185 bool atomic;
186 } buffer[PIPE_MAX_SHADER_BUFFERS];
187 unsigned nr_buffers;
188
189 struct util_bitmask *free_temps;
190 struct util_bitmask *local_temps;
191 struct util_bitmask *decl_temps;
192 unsigned nr_temps;
193
194 unsigned array_temps[UREG_MAX_ARRAY_TEMPS];
195 unsigned nr_array_temps;
196
197 struct const_decl const_decls[PIPE_MAX_CONSTANT_BUFFERS];
198
199 struct hw_atomic_decl hw_atomic_decls[PIPE_MAX_HW_ATOMIC_BUFFERS];
200
201 unsigned properties[TGSI_PROPERTY_COUNT];
202
203 unsigned nr_addrs;
204 unsigned nr_instructions;
205
206 struct ureg_tokens domain[2];
207
208 bool use_memory[TGSI_MEMORY_TYPE_COUNT];
209
210 bool precise;
211 };
212
213 static union tgsi_any_token error_tokens[32];
214
tokens_error(struct ureg_tokens * tokens)215 static void tokens_error( struct ureg_tokens *tokens )
216 {
217 if (tokens->tokens && tokens->tokens != error_tokens)
218 FREE(tokens->tokens);
219
220 tokens->tokens = error_tokens;
221 tokens->size = ARRAY_SIZE(error_tokens);
222 tokens->count = 0;
223 }
224
225
tokens_expand(struct ureg_tokens * tokens,unsigned count)226 static void tokens_expand( struct ureg_tokens *tokens,
227 unsigned count )
228 {
229 unsigned old_size = tokens->size * sizeof(unsigned);
230
231 if (tokens->tokens == error_tokens) {
232 return;
233 }
234
235 while (tokens->count + count > tokens->size) {
236 tokens->size = (1 << ++tokens->order);
237 }
238
239 tokens->tokens = REALLOC(tokens->tokens,
240 old_size,
241 tokens->size * sizeof(unsigned));
242 if (tokens->tokens == NULL) {
243 tokens_error(tokens);
244 }
245 }
246
set_bad(struct ureg_program * ureg)247 static void set_bad( struct ureg_program *ureg )
248 {
249 tokens_error(&ureg->domain[0]);
250 }
251
252
253
get_tokens(struct ureg_program * ureg,unsigned domain,unsigned count)254 static union tgsi_any_token *get_tokens( struct ureg_program *ureg,
255 unsigned domain,
256 unsigned count )
257 {
258 struct ureg_tokens *tokens = &ureg->domain[domain];
259 union tgsi_any_token *result;
260
261 if (tokens->count + count > tokens->size)
262 tokens_expand(tokens, count);
263
264 result = &tokens->tokens[tokens->count];
265 tokens->count += count;
266 return result;
267 }
268
269
retrieve_token(struct ureg_program * ureg,unsigned domain,unsigned nr)270 static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
271 unsigned domain,
272 unsigned nr )
273 {
274 if (ureg->domain[domain].tokens == error_tokens)
275 return &error_tokens[0];
276
277 return &ureg->domain[domain].tokens[nr];
278 }
279
280
281 void
ureg_property(struct ureg_program * ureg,unsigned name,unsigned value)282 ureg_property(struct ureg_program *ureg, unsigned name, unsigned value)
283 {
284 assert(name < ARRAY_SIZE(ureg->properties));
285 ureg->properties[name] = value;
286 }
287
288 struct ureg_src
ureg_DECL_fs_input_centroid_layout(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,enum tgsi_interpolate_mode interp_mode,enum tgsi_interpolate_loc interp_location,unsigned index,unsigned usage_mask,unsigned array_id,unsigned array_size)289 ureg_DECL_fs_input_centroid_layout(struct ureg_program *ureg,
290 enum tgsi_semantic semantic_name,
291 unsigned semantic_index,
292 enum tgsi_interpolate_mode interp_mode,
293 enum tgsi_interpolate_loc interp_location,
294 unsigned index,
295 unsigned usage_mask,
296 unsigned array_id,
297 unsigned array_size)
298 {
299 unsigned i;
300
301 assert(usage_mask != 0);
302 assert(usage_mask <= TGSI_WRITEMASK_XYZW);
303
304 for (i = 0; i < ureg->nr_inputs; i++) {
305 if (ureg->input[i].semantic_name == semantic_name &&
306 ureg->input[i].semantic_index == semantic_index) {
307 assert(ureg->input[i].interp == interp_mode);
308 assert(ureg->input[i].interp_location == interp_location);
309 if (ureg->input[i].array_id == array_id) {
310 ureg->input[i].usage_mask |= usage_mask;
311 ureg->input[i].last = MAX2(ureg->input[i].last, ureg->input[i].first + array_size - 1);
312 ureg->nr_input_regs = MAX2(ureg->nr_input_regs, ureg->input[i].last + 1);
313 goto out;
314 }
315 assert((ureg->input[i].usage_mask & usage_mask) == 0);
316 }
317 }
318
319 if (ureg->nr_inputs < UREG_MAX_INPUT) {
320 assert(array_size >= 1);
321 ureg->input[i].semantic_name = semantic_name;
322 ureg->input[i].semantic_index = semantic_index;
323 ureg->input[i].interp = interp_mode;
324 ureg->input[i].interp_location = interp_location;
325 ureg->input[i].first = index;
326 ureg->input[i].last = index + array_size - 1;
327 ureg->input[i].array_id = array_id;
328 ureg->input[i].usage_mask = usage_mask;
329 ureg->nr_input_regs = MAX2(ureg->nr_input_regs, index + array_size);
330 ureg->nr_inputs++;
331 } else {
332 set_bad(ureg);
333 }
334
335 out:
336 return ureg_src_array_register(TGSI_FILE_INPUT, ureg->input[i].first,
337 array_id);
338 }
339
340 struct ureg_src
ureg_DECL_fs_input_centroid(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,enum tgsi_interpolate_mode interp_mode,enum tgsi_interpolate_loc interp_location,unsigned array_id,unsigned array_size)341 ureg_DECL_fs_input_centroid(struct ureg_program *ureg,
342 enum tgsi_semantic semantic_name,
343 unsigned semantic_index,
344 enum tgsi_interpolate_mode interp_mode,
345 enum tgsi_interpolate_loc interp_location,
346 unsigned array_id,
347 unsigned array_size)
348 {
349 return ureg_DECL_fs_input_centroid_layout(ureg,
350 semantic_name, semantic_index, interp_mode,
351 interp_location,
352 ureg->nr_input_regs, TGSI_WRITEMASK_XYZW, array_id, array_size);
353 }
354
355
356 struct ureg_src
ureg_DECL_vs_input(struct ureg_program * ureg,unsigned index)357 ureg_DECL_vs_input( struct ureg_program *ureg,
358 unsigned index )
359 {
360 assert(ureg->processor == PIPE_SHADER_VERTEX);
361 assert(index / 32 < ARRAY_SIZE(ureg->vs_inputs));
362
363 ureg->vs_inputs[index/32] |= 1 << (index % 32);
364 return ureg_src_register( TGSI_FILE_INPUT, index );
365 }
366
367
368 struct ureg_src
ureg_DECL_input_layout(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned index,unsigned usage_mask,unsigned array_id,unsigned array_size)369 ureg_DECL_input_layout(struct ureg_program *ureg,
370 enum tgsi_semantic semantic_name,
371 unsigned semantic_index,
372 unsigned index,
373 unsigned usage_mask,
374 unsigned array_id,
375 unsigned array_size)
376 {
377 return ureg_DECL_fs_input_centroid_layout(ureg,
378 semantic_name, semantic_index,
379 TGSI_INTERPOLATE_CONSTANT, TGSI_INTERPOLATE_LOC_CENTER,
380 index, usage_mask, array_id, array_size);
381 }
382
383
384 struct ureg_src
ureg_DECL_input(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned array_id,unsigned array_size)385 ureg_DECL_input(struct ureg_program *ureg,
386 enum tgsi_semantic semantic_name,
387 unsigned semantic_index,
388 unsigned array_id,
389 unsigned array_size)
390 {
391 return ureg_DECL_fs_input_centroid(ureg, semantic_name, semantic_index,
392 TGSI_INTERPOLATE_CONSTANT,
393 TGSI_INTERPOLATE_LOC_CENTER,
394 array_id, array_size);
395 }
396
397
398 struct ureg_src
ureg_DECL_system_value(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index)399 ureg_DECL_system_value(struct ureg_program *ureg,
400 enum tgsi_semantic semantic_name,
401 unsigned semantic_index)
402 {
403 unsigned i;
404
405 for (i = 0; i < ureg->nr_system_values; i++) {
406 if (ureg->system_value[i].semantic_name == semantic_name &&
407 ureg->system_value[i].semantic_index == semantic_index) {
408 goto out;
409 }
410 }
411
412 if (ureg->nr_system_values < UREG_MAX_SYSTEM_VALUE) {
413 ureg->system_value[ureg->nr_system_values].semantic_name = semantic_name;
414 ureg->system_value[ureg->nr_system_values].semantic_index = semantic_index;
415 i = ureg->nr_system_values;
416 ureg->nr_system_values++;
417 } else {
418 set_bad(ureg);
419 }
420
421 out:
422 return ureg_src_register(TGSI_FILE_SYSTEM_VALUE, i);
423 }
424
425
426 struct ureg_dst
ureg_DECL_output_layout(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned streams,unsigned index,unsigned usage_mask,unsigned array_id,unsigned array_size,boolean invariant)427 ureg_DECL_output_layout(struct ureg_program *ureg,
428 enum tgsi_semantic semantic_name,
429 unsigned semantic_index,
430 unsigned streams,
431 unsigned index,
432 unsigned usage_mask,
433 unsigned array_id,
434 unsigned array_size,
435 boolean invariant)
436 {
437 unsigned i;
438
439 assert(usage_mask != 0);
440 assert(!(streams & 0x03) || (usage_mask & 1));
441 assert(!(streams & 0x0c) || (usage_mask & 2));
442 assert(!(streams & 0x30) || (usage_mask & 4));
443 assert(!(streams & 0xc0) || (usage_mask & 8));
444
445 for (i = 0; i < ureg->nr_outputs; i++) {
446 if (ureg->output[i].semantic_name == semantic_name &&
447 ureg->output[i].semantic_index == semantic_index) {
448 if (ureg->output[i].array_id == array_id) {
449 ureg->output[i].usage_mask |= usage_mask;
450 ureg->output[i].last = MAX2(ureg->output[i].last, ureg->output[i].first + array_size - 1);
451 ureg->nr_output_regs = MAX2(ureg->nr_output_regs, ureg->output[i].last + 1);
452 goto out;
453 }
454 assert((ureg->output[i].usage_mask & usage_mask) == 0);
455 }
456 }
457
458 if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
459 ureg->output[i].semantic_name = semantic_name;
460 ureg->output[i].semantic_index = semantic_index;
461 ureg->output[i].usage_mask = usage_mask;
462 ureg->output[i].first = index;
463 ureg->output[i].last = index + array_size - 1;
464 ureg->output[i].array_id = array_id;
465 ureg->output[i].invariant = invariant;
466 ureg->nr_output_regs = MAX2(ureg->nr_output_regs, index + array_size);
467 ureg->nr_outputs++;
468 }
469 else {
470 set_bad( ureg );
471 i = 0;
472 }
473
474 out:
475 ureg->output[i].streams |= streams;
476
477 return ureg_dst_array_register(TGSI_FILE_OUTPUT, ureg->output[i].first,
478 array_id);
479 }
480
481
482 struct ureg_dst
ureg_DECL_output_masked(struct ureg_program * ureg,unsigned name,unsigned index,unsigned usage_mask,unsigned array_id,unsigned array_size)483 ureg_DECL_output_masked(struct ureg_program *ureg,
484 unsigned name,
485 unsigned index,
486 unsigned usage_mask,
487 unsigned array_id,
488 unsigned array_size)
489 {
490 return ureg_DECL_output_layout(ureg, name, index, 0,
491 ureg->nr_output_regs, usage_mask, array_id,
492 array_size, FALSE);
493 }
494
495
496 struct ureg_dst
ureg_DECL_output(struct ureg_program * ureg,enum tgsi_semantic name,unsigned index)497 ureg_DECL_output(struct ureg_program *ureg,
498 enum tgsi_semantic name,
499 unsigned index)
500 {
501 return ureg_DECL_output_masked(ureg, name, index, TGSI_WRITEMASK_XYZW,
502 0, 1);
503 }
504
505 struct ureg_dst
ureg_DECL_output_array(struct ureg_program * ureg,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned array_id,unsigned array_size)506 ureg_DECL_output_array(struct ureg_program *ureg,
507 enum tgsi_semantic semantic_name,
508 unsigned semantic_index,
509 unsigned array_id,
510 unsigned array_size)
511 {
512 return ureg_DECL_output_masked(ureg, semantic_name, semantic_index,
513 TGSI_WRITEMASK_XYZW,
514 array_id, array_size);
515 }
516
517
518 /* Returns a new constant register. Keep track of which have been
519 * referred to so that we can emit decls later.
520 *
521 * Constant operands declared with this function must be addressed
522 * with a two-dimensional index.
523 *
524 * There is nothing in this code to bind this constant to any tracked
525 * value or manage any constant_buffer contents -- that's the
526 * resposibility of the calling code.
527 */
528 void
ureg_DECL_constant2D(struct ureg_program * ureg,unsigned first,unsigned last,unsigned index2D)529 ureg_DECL_constant2D(struct ureg_program *ureg,
530 unsigned first,
531 unsigned last,
532 unsigned index2D)
533 {
534 struct const_decl *decl = &ureg->const_decls[index2D];
535
536 assert(index2D < PIPE_MAX_CONSTANT_BUFFERS);
537
538 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
539 uint i = decl->nr_constant_ranges++;
540
541 decl->constant_range[i].first = first;
542 decl->constant_range[i].last = last;
543 }
544 }
545
546
547 /* A one-dimensional, deprecated version of ureg_DECL_constant2D().
548 *
549 * Constant operands declared with this function must be addressed
550 * with a one-dimensional index.
551 */
552 struct ureg_src
ureg_DECL_constant(struct ureg_program * ureg,unsigned index)553 ureg_DECL_constant(struct ureg_program *ureg,
554 unsigned index)
555 {
556 struct const_decl *decl = &ureg->const_decls[0];
557 unsigned minconst = index, maxconst = index;
558 unsigned i;
559
560 /* Inside existing range?
561 */
562 for (i = 0; i < decl->nr_constant_ranges; i++) {
563 if (decl->constant_range[i].first <= index &&
564 decl->constant_range[i].last >= index) {
565 goto out;
566 }
567 }
568
569 /* Extend existing range?
570 */
571 for (i = 0; i < decl->nr_constant_ranges; i++) {
572 if (decl->constant_range[i].last == index - 1) {
573 decl->constant_range[i].last = index;
574 goto out;
575 }
576
577 if (decl->constant_range[i].first == index + 1) {
578 decl->constant_range[i].first = index;
579 goto out;
580 }
581
582 minconst = MIN2(minconst, decl->constant_range[i].first);
583 maxconst = MAX2(maxconst, decl->constant_range[i].last);
584 }
585
586 /* Create new range?
587 */
588 if (decl->nr_constant_ranges < UREG_MAX_CONSTANT_RANGE) {
589 i = decl->nr_constant_ranges++;
590 decl->constant_range[i].first = index;
591 decl->constant_range[i].last = index;
592 goto out;
593 }
594
595 /* Collapse all ranges down to one:
596 */
597 i = 0;
598 decl->constant_range[0].first = minconst;
599 decl->constant_range[0].last = maxconst;
600 decl->nr_constant_ranges = 1;
601
602 out:
603 assert(i < decl->nr_constant_ranges);
604 assert(decl->constant_range[i].first <= index);
605 assert(decl->constant_range[i].last >= index);
606
607 struct ureg_src src = ureg_src_register(TGSI_FILE_CONSTANT, index);
608 return ureg_src_dimension(src, 0);
609 }
610
611
612 /* Returns a new hw atomic register. Keep track of which have been
613 * referred to so that we can emit decls later.
614 */
615 void
ureg_DECL_hw_atomic(struct ureg_program * ureg,unsigned first,unsigned last,unsigned buffer_id,unsigned array_id)616 ureg_DECL_hw_atomic(struct ureg_program *ureg,
617 unsigned first,
618 unsigned last,
619 unsigned buffer_id,
620 unsigned array_id)
621 {
622 struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[buffer_id];
623
624 if (decl->nr_hw_atomic_ranges < UREG_MAX_HW_ATOMIC_RANGE) {
625 uint i = decl->nr_hw_atomic_ranges++;
626
627 decl->hw_atomic_range[i].first = first;
628 decl->hw_atomic_range[i].last = last;
629 decl->hw_atomic_range[i].array_id = array_id;
630 } else {
631 set_bad(ureg);
632 }
633 }
634
alloc_temporary(struct ureg_program * ureg,boolean local)635 static struct ureg_dst alloc_temporary( struct ureg_program *ureg,
636 boolean local )
637 {
638 unsigned i;
639
640 /* Look for a released temporary.
641 */
642 for (i = util_bitmask_get_first_index(ureg->free_temps);
643 i != UTIL_BITMASK_INVALID_INDEX;
644 i = util_bitmask_get_next_index(ureg->free_temps, i + 1)) {
645 if (util_bitmask_get(ureg->local_temps, i) == local)
646 break;
647 }
648
649 /* Or allocate a new one.
650 */
651 if (i == UTIL_BITMASK_INVALID_INDEX) {
652 i = ureg->nr_temps++;
653
654 if (local)
655 util_bitmask_set(ureg->local_temps, i);
656
657 /* Start a new declaration when the local flag changes */
658 if (!i || util_bitmask_get(ureg->local_temps, i - 1) != local)
659 util_bitmask_set(ureg->decl_temps, i);
660 }
661
662 util_bitmask_clear(ureg->free_temps, i);
663
664 return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
665 }
666
ureg_DECL_temporary(struct ureg_program * ureg)667 struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
668 {
669 return alloc_temporary(ureg, FALSE);
670 }
671
ureg_DECL_local_temporary(struct ureg_program * ureg)672 struct ureg_dst ureg_DECL_local_temporary( struct ureg_program *ureg )
673 {
674 return alloc_temporary(ureg, TRUE);
675 }
676
ureg_DECL_array_temporary(struct ureg_program * ureg,unsigned size,boolean local)677 struct ureg_dst ureg_DECL_array_temporary( struct ureg_program *ureg,
678 unsigned size,
679 boolean local )
680 {
681 unsigned i = ureg->nr_temps;
682 struct ureg_dst dst = ureg_dst_register( TGSI_FILE_TEMPORARY, i );
683
684 if (local)
685 util_bitmask_set(ureg->local_temps, i);
686
687 /* Always start a new declaration at the start */
688 util_bitmask_set(ureg->decl_temps, i);
689
690 ureg->nr_temps += size;
691
692 /* and also at the end of the array */
693 util_bitmask_set(ureg->decl_temps, ureg->nr_temps);
694
695 if (ureg->nr_array_temps < UREG_MAX_ARRAY_TEMPS) {
696 ureg->array_temps[ureg->nr_array_temps++] = i;
697 dst.ArrayID = ureg->nr_array_temps;
698 }
699
700 return dst;
701 }
702
ureg_release_temporary(struct ureg_program * ureg,struct ureg_dst tmp)703 void ureg_release_temporary( struct ureg_program *ureg,
704 struct ureg_dst tmp )
705 {
706 if(tmp.File == TGSI_FILE_TEMPORARY)
707 util_bitmask_set(ureg->free_temps, tmp.Index);
708 }
709
710
711 /* Allocate a new address register.
712 */
ureg_DECL_address(struct ureg_program * ureg)713 struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
714 {
715 if (ureg->nr_addrs < UREG_MAX_ADDR)
716 return ureg_dst_register( TGSI_FILE_ADDRESS, ureg->nr_addrs++ );
717
718 assert( 0 );
719 return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
720 }
721
722 /* Allocate a new sampler.
723 */
ureg_DECL_sampler(struct ureg_program * ureg,unsigned nr)724 struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg,
725 unsigned nr )
726 {
727 unsigned i;
728
729 for (i = 0; i < ureg->nr_samplers; i++)
730 if (ureg->sampler[i].Index == (int)nr)
731 return ureg->sampler[i];
732
733 if (i < PIPE_MAX_SAMPLERS) {
734 ureg->sampler[i] = ureg_src_register( TGSI_FILE_SAMPLER, nr );
735 ureg->nr_samplers++;
736 return ureg->sampler[i];
737 }
738
739 assert( 0 );
740 return ureg->sampler[0];
741 }
742
743 /*
744 * Allocate a new shader sampler view.
745 */
746 struct ureg_src
ureg_DECL_sampler_view(struct ureg_program * ureg,unsigned index,enum tgsi_texture_type target,enum tgsi_return_type return_type_x,enum tgsi_return_type return_type_y,enum tgsi_return_type return_type_z,enum tgsi_return_type return_type_w)747 ureg_DECL_sampler_view(struct ureg_program *ureg,
748 unsigned index,
749 enum tgsi_texture_type target,
750 enum tgsi_return_type return_type_x,
751 enum tgsi_return_type return_type_y,
752 enum tgsi_return_type return_type_z,
753 enum tgsi_return_type return_type_w)
754 {
755 struct ureg_src reg = ureg_src_register(TGSI_FILE_SAMPLER_VIEW, index);
756 uint i;
757
758 for (i = 0; i < ureg->nr_sampler_views; i++) {
759 if (ureg->sampler_view[i].index == index) {
760 return reg;
761 }
762 }
763
764 if (i < PIPE_MAX_SHADER_SAMPLER_VIEWS) {
765 ureg->sampler_view[i].index = index;
766 ureg->sampler_view[i].target = target;
767 ureg->sampler_view[i].return_type_x = return_type_x;
768 ureg->sampler_view[i].return_type_y = return_type_y;
769 ureg->sampler_view[i].return_type_z = return_type_z;
770 ureg->sampler_view[i].return_type_w = return_type_w;
771 ureg->nr_sampler_views++;
772 return reg;
773 }
774
775 assert(0);
776 return reg;
777 }
778
779 /* Allocate a new image.
780 */
781 struct ureg_src
ureg_DECL_image(struct ureg_program * ureg,unsigned index,enum tgsi_texture_type target,enum pipe_format format,boolean wr,boolean raw)782 ureg_DECL_image(struct ureg_program *ureg,
783 unsigned index,
784 enum tgsi_texture_type target,
785 enum pipe_format format,
786 boolean wr,
787 boolean raw)
788 {
789 struct ureg_src reg = ureg_src_register(TGSI_FILE_IMAGE, index);
790 unsigned i;
791
792 for (i = 0; i < ureg->nr_images; i++)
793 if (ureg->image[i].index == index)
794 return reg;
795
796 if (i < PIPE_MAX_SHADER_IMAGES) {
797 ureg->image[i].index = index;
798 ureg->image[i].target = target;
799 ureg->image[i].wr = wr;
800 ureg->image[i].raw = raw;
801 ureg->image[i].format = format;
802 ureg->nr_images++;
803 return reg;
804 }
805
806 assert(0);
807 return reg;
808 }
809
810 /* Allocate a new buffer.
811 */
ureg_DECL_buffer(struct ureg_program * ureg,unsigned nr,bool atomic)812 struct ureg_src ureg_DECL_buffer(struct ureg_program *ureg, unsigned nr,
813 bool atomic)
814 {
815 struct ureg_src reg = ureg_src_register(TGSI_FILE_BUFFER, nr);
816 unsigned i;
817
818 for (i = 0; i < ureg->nr_buffers; i++)
819 if (ureg->buffer[i].index == nr)
820 return reg;
821
822 if (i < PIPE_MAX_SHADER_BUFFERS) {
823 ureg->buffer[i].index = nr;
824 ureg->buffer[i].atomic = atomic;
825 ureg->nr_buffers++;
826 return reg;
827 }
828
829 assert(0);
830 return reg;
831 }
832
833 /* Allocate a memory area.
834 */
ureg_DECL_memory(struct ureg_program * ureg,unsigned memory_type)835 struct ureg_src ureg_DECL_memory(struct ureg_program *ureg,
836 unsigned memory_type)
837 {
838 struct ureg_src reg = ureg_src_register(TGSI_FILE_MEMORY, memory_type);
839
840 ureg->use_memory[memory_type] = true;
841 return reg;
842 }
843
844 static int
match_or_expand_immediate64(const unsigned * v,unsigned nr,unsigned * v2,unsigned * pnr2,unsigned * swizzle)845 match_or_expand_immediate64( const unsigned *v,
846 unsigned nr,
847 unsigned *v2,
848 unsigned *pnr2,
849 unsigned *swizzle )
850 {
851 unsigned nr2 = *pnr2;
852 unsigned i, j;
853 *swizzle = 0;
854
855 for (i = 0; i < nr; i += 2) {
856 boolean found = FALSE;
857
858 for (j = 0; j < nr2 && !found; j += 2) {
859 if (v[i] == v2[j] && v[i + 1] == v2[j + 1]) {
860 *swizzle |= (j << (i * 2)) | ((j + 1) << ((i + 1) * 2));
861 found = TRUE;
862 }
863 }
864 if (!found) {
865 if ((nr2) >= 4) {
866 return FALSE;
867 }
868
869 v2[nr2] = v[i];
870 v2[nr2 + 1] = v[i + 1];
871
872 *swizzle |= (nr2 << (i * 2)) | ((nr2 + 1) << ((i + 1) * 2));
873 nr2 += 2;
874 }
875 }
876
877 /* Actually expand immediate only when fully succeeded.
878 */
879 *pnr2 = nr2;
880 return TRUE;
881 }
882
883 static int
match_or_expand_immediate(const unsigned * v,int type,unsigned nr,unsigned * v2,unsigned * pnr2,unsigned * swizzle)884 match_or_expand_immediate( const unsigned *v,
885 int type,
886 unsigned nr,
887 unsigned *v2,
888 unsigned *pnr2,
889 unsigned *swizzle )
890 {
891 unsigned nr2 = *pnr2;
892 unsigned i, j;
893
894 if (type == TGSI_IMM_FLOAT64 ||
895 type == TGSI_IMM_UINT64 ||
896 type == TGSI_IMM_INT64)
897 return match_or_expand_immediate64(v, nr, v2, pnr2, swizzle);
898
899 *swizzle = 0;
900
901 for (i = 0; i < nr; i++) {
902 boolean found = FALSE;
903
904 for (j = 0; j < nr2 && !found; j++) {
905 if (v[i] == v2[j]) {
906 *swizzle |= j << (i * 2);
907 found = TRUE;
908 }
909 }
910
911 if (!found) {
912 if (nr2 >= 4) {
913 return FALSE;
914 }
915
916 v2[nr2] = v[i];
917 *swizzle |= nr2 << (i * 2);
918 nr2++;
919 }
920 }
921
922 /* Actually expand immediate only when fully succeeded.
923 */
924 *pnr2 = nr2;
925 return TRUE;
926 }
927
928
929 static struct ureg_src
decl_immediate(struct ureg_program * ureg,const unsigned * v,unsigned nr,unsigned type)930 decl_immediate( struct ureg_program *ureg,
931 const unsigned *v,
932 unsigned nr,
933 unsigned type )
934 {
935 unsigned i, j;
936 unsigned swizzle = 0;
937
938 /* Could do a first pass where we examine all existing immediates
939 * without expanding.
940 */
941
942 for (i = 0; i < ureg->nr_immediates; i++) {
943 if (ureg->immediate[i].type != type) {
944 continue;
945 }
946 if (match_or_expand_immediate(v,
947 type,
948 nr,
949 ureg->immediate[i].value.u,
950 &ureg->immediate[i].nr,
951 &swizzle)) {
952 goto out;
953 }
954 }
955
956 if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
957 i = ureg->nr_immediates++;
958 ureg->immediate[i].type = type;
959 if (match_or_expand_immediate(v,
960 type,
961 nr,
962 ureg->immediate[i].value.u,
963 &ureg->immediate[i].nr,
964 &swizzle)) {
965 goto out;
966 }
967 }
968
969 set_bad(ureg);
970
971 out:
972 /* Make sure that all referenced elements are from this immediate.
973 * Has the effect of making size-one immediates into scalars.
974 */
975 if (type == TGSI_IMM_FLOAT64 ||
976 type == TGSI_IMM_UINT64 ||
977 type == TGSI_IMM_INT64) {
978 for (j = nr; j < 4; j+=2) {
979 swizzle |= (swizzle & 0xf) << (j * 2);
980 }
981 } else {
982 for (j = nr; j < 4; j++) {
983 swizzle |= (swizzle & 0x3) << (j * 2);
984 }
985 }
986 return ureg_swizzle(ureg_src_register(TGSI_FILE_IMMEDIATE, i),
987 (swizzle >> 0) & 0x3,
988 (swizzle >> 2) & 0x3,
989 (swizzle >> 4) & 0x3,
990 (swizzle >> 6) & 0x3);
991 }
992
993
994 struct ureg_src
ureg_DECL_immediate(struct ureg_program * ureg,const float * v,unsigned nr)995 ureg_DECL_immediate( struct ureg_program *ureg,
996 const float *v,
997 unsigned nr )
998 {
999 union {
1000 float f[4];
1001 unsigned u[4];
1002 } fu;
1003 unsigned int i;
1004
1005 for (i = 0; i < nr; i++) {
1006 fu.f[i] = v[i];
1007 }
1008
1009 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT32);
1010 }
1011
1012 struct ureg_src
ureg_DECL_immediate_f64(struct ureg_program * ureg,const double * v,unsigned nr)1013 ureg_DECL_immediate_f64( struct ureg_program *ureg,
1014 const double *v,
1015 unsigned nr )
1016 {
1017 union {
1018 unsigned u[4];
1019 double d[2];
1020 } fu;
1021 unsigned int i;
1022
1023 assert((nr / 2) < 3);
1024 for (i = 0; i < nr / 2; i++) {
1025 fu.d[i] = v[i];
1026 }
1027
1028 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_FLOAT64);
1029 }
1030
1031 struct ureg_src
ureg_DECL_immediate_uint(struct ureg_program * ureg,const unsigned * v,unsigned nr)1032 ureg_DECL_immediate_uint( struct ureg_program *ureg,
1033 const unsigned *v,
1034 unsigned nr )
1035 {
1036 return decl_immediate(ureg, v, nr, TGSI_IMM_UINT32);
1037 }
1038
1039
1040 struct ureg_src
ureg_DECL_immediate_block_uint(struct ureg_program * ureg,const unsigned * v,unsigned nr)1041 ureg_DECL_immediate_block_uint( struct ureg_program *ureg,
1042 const unsigned *v,
1043 unsigned nr )
1044 {
1045 uint index;
1046 uint i;
1047
1048 if (ureg->nr_immediates + (nr + 3) / 4 > UREG_MAX_IMMEDIATE) {
1049 set_bad(ureg);
1050 return ureg_src_register(TGSI_FILE_IMMEDIATE, 0);
1051 }
1052
1053 index = ureg->nr_immediates;
1054 ureg->nr_immediates += (nr + 3) / 4;
1055
1056 for (i = index; i < ureg->nr_immediates; i++) {
1057 ureg->immediate[i].type = TGSI_IMM_UINT32;
1058 ureg->immediate[i].nr = nr > 4 ? 4 : nr;
1059 memcpy(ureg->immediate[i].value.u,
1060 &v[(i - index) * 4],
1061 ureg->immediate[i].nr * sizeof(uint));
1062 nr -= 4;
1063 }
1064
1065 return ureg_src_register(TGSI_FILE_IMMEDIATE, index);
1066 }
1067
1068
1069 struct ureg_src
ureg_DECL_immediate_int(struct ureg_program * ureg,const int * v,unsigned nr)1070 ureg_DECL_immediate_int( struct ureg_program *ureg,
1071 const int *v,
1072 unsigned nr )
1073 {
1074 return decl_immediate(ureg, (const unsigned *)v, nr, TGSI_IMM_INT32);
1075 }
1076
1077 struct ureg_src
ureg_DECL_immediate_uint64(struct ureg_program * ureg,const uint64_t * v,unsigned nr)1078 ureg_DECL_immediate_uint64( struct ureg_program *ureg,
1079 const uint64_t *v,
1080 unsigned nr )
1081 {
1082 union {
1083 unsigned u[4];
1084 uint64_t u64[2];
1085 } fu;
1086 unsigned int i;
1087
1088 assert((nr / 2) < 3);
1089 for (i = 0; i < nr / 2; i++) {
1090 fu.u64[i] = v[i];
1091 }
1092
1093 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_UINT64);
1094 }
1095
1096 struct ureg_src
ureg_DECL_immediate_int64(struct ureg_program * ureg,const int64_t * v,unsigned nr)1097 ureg_DECL_immediate_int64( struct ureg_program *ureg,
1098 const int64_t *v,
1099 unsigned nr )
1100 {
1101 union {
1102 unsigned u[4];
1103 int64_t i64[2];
1104 } fu;
1105 unsigned int i;
1106
1107 assert((nr / 2) < 3);
1108 for (i = 0; i < nr / 2; i++) {
1109 fu.i64[i] = v[i];
1110 }
1111
1112 return decl_immediate(ureg, fu.u, nr, TGSI_IMM_INT64);
1113 }
1114
1115 void
ureg_emit_src(struct ureg_program * ureg,struct ureg_src src)1116 ureg_emit_src( struct ureg_program *ureg,
1117 struct ureg_src src )
1118 {
1119 unsigned size = 1 + (src.Indirect ? 1 : 0) +
1120 (src.Dimension ? (src.DimIndirect ? 2 : 1) : 0);
1121
1122 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
1123 unsigned n = 0;
1124
1125 assert(src.File != TGSI_FILE_NULL);
1126 assert(src.File < TGSI_FILE_COUNT);
1127
1128 out[n].value = 0;
1129 out[n].src.File = src.File;
1130 out[n].src.SwizzleX = src.SwizzleX;
1131 out[n].src.SwizzleY = src.SwizzleY;
1132 out[n].src.SwizzleZ = src.SwizzleZ;
1133 out[n].src.SwizzleW = src.SwizzleW;
1134 out[n].src.Index = src.Index;
1135 out[n].src.Negate = src.Negate;
1136 out[0].src.Absolute = src.Absolute;
1137 n++;
1138
1139 if (src.Indirect) {
1140 out[0].src.Indirect = 1;
1141 out[n].value = 0;
1142 out[n].ind.File = src.IndirectFile;
1143 out[n].ind.Swizzle = src.IndirectSwizzle;
1144 out[n].ind.Index = src.IndirectIndex;
1145 if (!ureg->supports_any_inout_decl_range &&
1146 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
1147 out[n].ind.ArrayID = 0;
1148 else
1149 out[n].ind.ArrayID = src.ArrayID;
1150 n++;
1151 }
1152
1153 if (src.Dimension) {
1154 out[0].src.Dimension = 1;
1155 out[n].dim.Dimension = 0;
1156 out[n].dim.Padding = 0;
1157 if (src.DimIndirect) {
1158 out[n].dim.Indirect = 1;
1159 out[n].dim.Index = src.DimensionIndex;
1160 n++;
1161 out[n].value = 0;
1162 out[n].ind.File = src.DimIndFile;
1163 out[n].ind.Swizzle = src.DimIndSwizzle;
1164 out[n].ind.Index = src.DimIndIndex;
1165 if (!ureg->supports_any_inout_decl_range &&
1166 (src.File == TGSI_FILE_INPUT || src.File == TGSI_FILE_OUTPUT))
1167 out[n].ind.ArrayID = 0;
1168 else
1169 out[n].ind.ArrayID = src.ArrayID;
1170 } else {
1171 out[n].dim.Indirect = 0;
1172 out[n].dim.Index = src.DimensionIndex;
1173 }
1174 n++;
1175 }
1176
1177 assert(n == size);
1178 }
1179
1180
1181 void
ureg_emit_dst(struct ureg_program * ureg,struct ureg_dst dst)1182 ureg_emit_dst( struct ureg_program *ureg,
1183 struct ureg_dst dst )
1184 {
1185 unsigned size = 1 + (dst.Indirect ? 1 : 0) +
1186 (dst.Dimension ? (dst.DimIndirect ? 2 : 1) : 0);
1187
1188 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
1189 unsigned n = 0;
1190
1191 assert(dst.File != TGSI_FILE_NULL);
1192 assert(dst.File != TGSI_FILE_SAMPLER);
1193 assert(dst.File != TGSI_FILE_SAMPLER_VIEW);
1194 assert(dst.File != TGSI_FILE_IMMEDIATE);
1195 assert(dst.File < TGSI_FILE_COUNT);
1196
1197 out[n].value = 0;
1198 out[n].dst.File = dst.File;
1199 out[n].dst.WriteMask = dst.WriteMask;
1200 out[n].dst.Indirect = dst.Indirect;
1201 out[n].dst.Index = dst.Index;
1202 n++;
1203
1204 if (dst.Indirect) {
1205 out[n].value = 0;
1206 out[n].ind.File = dst.IndirectFile;
1207 out[n].ind.Swizzle = dst.IndirectSwizzle;
1208 out[n].ind.Index = dst.IndirectIndex;
1209 if (!ureg->supports_any_inout_decl_range &&
1210 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
1211 out[n].ind.ArrayID = 0;
1212 else
1213 out[n].ind.ArrayID = dst.ArrayID;
1214 n++;
1215 }
1216
1217 if (dst.Dimension) {
1218 out[0].dst.Dimension = 1;
1219 out[n].dim.Dimension = 0;
1220 out[n].dim.Padding = 0;
1221 if (dst.DimIndirect) {
1222 out[n].dim.Indirect = 1;
1223 out[n].dim.Index = dst.DimensionIndex;
1224 n++;
1225 out[n].value = 0;
1226 out[n].ind.File = dst.DimIndFile;
1227 out[n].ind.Swizzle = dst.DimIndSwizzle;
1228 out[n].ind.Index = dst.DimIndIndex;
1229 if (!ureg->supports_any_inout_decl_range &&
1230 (dst.File == TGSI_FILE_INPUT || dst.File == TGSI_FILE_OUTPUT))
1231 out[n].ind.ArrayID = 0;
1232 else
1233 out[n].ind.ArrayID = dst.ArrayID;
1234 } else {
1235 out[n].dim.Indirect = 0;
1236 out[n].dim.Index = dst.DimensionIndex;
1237 }
1238 n++;
1239 }
1240
1241 assert(n == size);
1242 }
1243
1244
validate(enum tgsi_opcode opcode,unsigned nr_dst,unsigned nr_src)1245 static void validate( enum tgsi_opcode opcode,
1246 unsigned nr_dst,
1247 unsigned nr_src )
1248 {
1249 #ifndef NDEBUG
1250 const struct tgsi_opcode_info *info = tgsi_get_opcode_info( opcode );
1251 assert(info);
1252 if (info) {
1253 assert(nr_dst == info->num_dst);
1254 assert(nr_src == info->num_src);
1255 }
1256 #endif
1257 }
1258
1259 struct ureg_emit_insn_result
ureg_emit_insn(struct ureg_program * ureg,enum tgsi_opcode opcode,boolean saturate,unsigned precise,unsigned num_dst,unsigned num_src)1260 ureg_emit_insn(struct ureg_program *ureg,
1261 enum tgsi_opcode opcode,
1262 boolean saturate,
1263 unsigned precise,
1264 unsigned num_dst,
1265 unsigned num_src)
1266 {
1267 union tgsi_any_token *out;
1268 uint count = 1;
1269 struct ureg_emit_insn_result result;
1270
1271 validate( opcode, num_dst, num_src );
1272
1273 out = get_tokens( ureg, DOMAIN_INSN, count );
1274 out[0].insn = tgsi_default_instruction();
1275 out[0].insn.Opcode = opcode;
1276 out[0].insn.Saturate = saturate;
1277 out[0].insn.Precise = precise || ureg->precise;
1278 out[0].insn.NumDstRegs = num_dst;
1279 out[0].insn.NumSrcRegs = num_src;
1280
1281 result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
1282 result.extended_token = result.insn_token;
1283
1284 ureg->nr_instructions++;
1285
1286 return result;
1287 }
1288
1289
1290 /**
1291 * Emit a label token.
1292 * \param label_token returns a token number indicating where the label
1293 * needs to be patched later. Later, this value should be passed to the
1294 * ureg_fixup_label() function.
1295 */
1296 void
ureg_emit_label(struct ureg_program * ureg,unsigned extended_token,unsigned * label_token)1297 ureg_emit_label(struct ureg_program *ureg,
1298 unsigned extended_token,
1299 unsigned *label_token )
1300 {
1301 union tgsi_any_token *out, *insn;
1302
1303 if (!label_token)
1304 return;
1305
1306 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1307 out[0].value = 0;
1308
1309 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1310 insn->insn.Label = 1;
1311
1312 *label_token = ureg->domain[DOMAIN_INSN].count - 1;
1313 }
1314
1315 /* Will return a number which can be used in a label to point to the
1316 * next instruction to be emitted.
1317 */
1318 unsigned
ureg_get_instruction_number(struct ureg_program * ureg)1319 ureg_get_instruction_number( struct ureg_program *ureg )
1320 {
1321 return ureg->nr_instructions;
1322 }
1323
1324 /* Patch a given label (expressed as a token number) to point to a
1325 * given instruction (expressed as an instruction number).
1326 */
1327 void
ureg_fixup_label(struct ureg_program * ureg,unsigned label_token,unsigned instruction_number)1328 ureg_fixup_label(struct ureg_program *ureg,
1329 unsigned label_token,
1330 unsigned instruction_number )
1331 {
1332 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
1333
1334 out->insn_label.Label = instruction_number;
1335 }
1336
1337
1338 void
ureg_emit_texture(struct ureg_program * ureg,unsigned extended_token,enum tgsi_texture_type target,enum tgsi_return_type return_type,unsigned num_offsets)1339 ureg_emit_texture(struct ureg_program *ureg,
1340 unsigned extended_token,
1341 enum tgsi_texture_type target,
1342 enum tgsi_return_type return_type, unsigned num_offsets)
1343 {
1344 union tgsi_any_token *out, *insn;
1345
1346 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1347 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1348
1349 insn->insn.Texture = 1;
1350
1351 out[0].value = 0;
1352 out[0].insn_texture.Texture = target;
1353 out[0].insn_texture.NumOffsets = num_offsets;
1354 out[0].insn_texture.ReturnType = return_type;
1355 }
1356
1357 void
ureg_emit_texture_offset(struct ureg_program * ureg,const struct tgsi_texture_offset * offset)1358 ureg_emit_texture_offset(struct ureg_program *ureg,
1359 const struct tgsi_texture_offset *offset)
1360 {
1361 union tgsi_any_token *out;
1362
1363 out = get_tokens( ureg, DOMAIN_INSN, 1);
1364
1365 out[0].value = 0;
1366 out[0].insn_texture_offset = *offset;
1367 }
1368
1369 void
ureg_emit_memory(struct ureg_program * ureg,unsigned extended_token,unsigned qualifier,enum tgsi_texture_type texture,enum pipe_format format)1370 ureg_emit_memory(struct ureg_program *ureg,
1371 unsigned extended_token,
1372 unsigned qualifier,
1373 enum tgsi_texture_type texture,
1374 enum pipe_format format)
1375 {
1376 union tgsi_any_token *out, *insn;
1377
1378 out = get_tokens( ureg, DOMAIN_INSN, 1 );
1379 insn = retrieve_token( ureg, DOMAIN_INSN, extended_token );
1380
1381 insn->insn.Memory = 1;
1382
1383 out[0].value = 0;
1384 out[0].insn_memory.Qualifier = qualifier;
1385 out[0].insn_memory.Texture = texture;
1386 out[0].insn_memory.Format = format;
1387 }
1388
1389 void
ureg_fixup_insn_size(struct ureg_program * ureg,unsigned insn)1390 ureg_fixup_insn_size(struct ureg_program *ureg,
1391 unsigned insn )
1392 {
1393 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn );
1394
1395 assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
1396 out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1;
1397 }
1398
1399
1400 void
ureg_insn(struct ureg_program * ureg,enum tgsi_opcode opcode,const struct ureg_dst * dst,unsigned nr_dst,const struct ureg_src * src,unsigned nr_src,unsigned precise)1401 ureg_insn(struct ureg_program *ureg,
1402 enum tgsi_opcode opcode,
1403 const struct ureg_dst *dst,
1404 unsigned nr_dst,
1405 const struct ureg_src *src,
1406 unsigned nr_src,
1407 unsigned precise )
1408 {
1409 struct ureg_emit_insn_result insn;
1410 unsigned i;
1411 boolean saturate;
1412
1413 if (nr_dst && ureg_dst_is_empty(dst[0])) {
1414 return;
1415 }
1416
1417 saturate = nr_dst ? dst[0].Saturate : FALSE;
1418
1419 insn = ureg_emit_insn(ureg,
1420 opcode,
1421 saturate,
1422 precise,
1423 nr_dst,
1424 nr_src);
1425
1426 for (i = 0; i < nr_dst; i++)
1427 ureg_emit_dst( ureg, dst[i] );
1428
1429 for (i = 0; i < nr_src; i++)
1430 ureg_emit_src( ureg, src[i] );
1431
1432 ureg_fixup_insn_size( ureg, insn.insn_token );
1433 }
1434
1435 void
ureg_tex_insn(struct ureg_program * ureg,enum tgsi_opcode opcode,const struct ureg_dst * dst,unsigned nr_dst,enum tgsi_texture_type target,enum tgsi_return_type return_type,const struct tgsi_texture_offset * texoffsets,unsigned nr_offset,const struct ureg_src * src,unsigned nr_src)1436 ureg_tex_insn(struct ureg_program *ureg,
1437 enum tgsi_opcode opcode,
1438 const struct ureg_dst *dst,
1439 unsigned nr_dst,
1440 enum tgsi_texture_type target,
1441 enum tgsi_return_type return_type,
1442 const struct tgsi_texture_offset *texoffsets,
1443 unsigned nr_offset,
1444 const struct ureg_src *src,
1445 unsigned nr_src )
1446 {
1447 struct ureg_emit_insn_result insn;
1448 unsigned i;
1449 boolean saturate;
1450
1451 if (nr_dst && ureg_dst_is_empty(dst[0])) {
1452 return;
1453 }
1454
1455 saturate = nr_dst ? dst[0].Saturate : FALSE;
1456
1457 insn = ureg_emit_insn(ureg,
1458 opcode,
1459 saturate,
1460 0,
1461 nr_dst,
1462 nr_src);
1463
1464 ureg_emit_texture( ureg, insn.extended_token, target, return_type,
1465 nr_offset );
1466
1467 for (i = 0; i < nr_offset; i++)
1468 ureg_emit_texture_offset( ureg, &texoffsets[i]);
1469
1470 for (i = 0; i < nr_dst; i++)
1471 ureg_emit_dst( ureg, dst[i] );
1472
1473 for (i = 0; i < nr_src; i++)
1474 ureg_emit_src( ureg, src[i] );
1475
1476 ureg_fixup_insn_size( ureg, insn.insn_token );
1477 }
1478
1479
1480 void
ureg_memory_insn(struct ureg_program * ureg,enum tgsi_opcode opcode,const struct ureg_dst * dst,unsigned nr_dst,const struct ureg_src * src,unsigned nr_src,unsigned qualifier,enum tgsi_texture_type texture,enum pipe_format format)1481 ureg_memory_insn(struct ureg_program *ureg,
1482 enum tgsi_opcode opcode,
1483 const struct ureg_dst *dst,
1484 unsigned nr_dst,
1485 const struct ureg_src *src,
1486 unsigned nr_src,
1487 unsigned qualifier,
1488 enum tgsi_texture_type texture,
1489 enum pipe_format format)
1490 {
1491 struct ureg_emit_insn_result insn;
1492 unsigned i;
1493
1494 insn = ureg_emit_insn(ureg,
1495 opcode,
1496 FALSE,
1497 0,
1498 nr_dst,
1499 nr_src);
1500
1501 ureg_emit_memory(ureg, insn.extended_token, qualifier, texture, format);
1502
1503 for (i = 0; i < nr_dst; i++)
1504 ureg_emit_dst(ureg, dst[i]);
1505
1506 for (i = 0; i < nr_src; i++)
1507 ureg_emit_src(ureg, src[i]);
1508
1509 ureg_fixup_insn_size(ureg, insn.insn_token);
1510 }
1511
1512
1513 static void
emit_decl_semantic(struct ureg_program * ureg,unsigned file,unsigned first,unsigned last,enum tgsi_semantic semantic_name,unsigned semantic_index,unsigned streams,unsigned usage_mask,unsigned array_id,boolean invariant)1514 emit_decl_semantic(struct ureg_program *ureg,
1515 unsigned file,
1516 unsigned first,
1517 unsigned last,
1518 enum tgsi_semantic semantic_name,
1519 unsigned semantic_index,
1520 unsigned streams,
1521 unsigned usage_mask,
1522 unsigned array_id,
1523 boolean invariant)
1524 {
1525 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
1526
1527 out[0].value = 0;
1528 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1529 out[0].decl.NrTokens = 3;
1530 out[0].decl.File = file;
1531 out[0].decl.UsageMask = usage_mask;
1532 out[0].decl.Semantic = 1;
1533 out[0].decl.Array = array_id != 0;
1534 out[0].decl.Invariant = invariant;
1535
1536 out[1].value = 0;
1537 out[1].decl_range.First = first;
1538 out[1].decl_range.Last = last;
1539
1540 out[2].value = 0;
1541 out[2].decl_semantic.Name = semantic_name;
1542 out[2].decl_semantic.Index = semantic_index;
1543 out[2].decl_semantic.StreamX = streams & 3;
1544 out[2].decl_semantic.StreamY = (streams >> 2) & 3;
1545 out[2].decl_semantic.StreamZ = (streams >> 4) & 3;
1546 out[2].decl_semantic.StreamW = (streams >> 6) & 3;
1547
1548 if (array_id) {
1549 out[3].value = 0;
1550 out[3].array.ArrayID = array_id;
1551 }
1552 }
1553
1554 static void
emit_decl_atomic_2d(struct ureg_program * ureg,unsigned first,unsigned last,unsigned index2D,unsigned array_id)1555 emit_decl_atomic_2d(struct ureg_program *ureg,
1556 unsigned first,
1557 unsigned last,
1558 unsigned index2D,
1559 unsigned array_id)
1560 {
1561 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, array_id ? 4 : 3);
1562
1563 out[0].value = 0;
1564 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1565 out[0].decl.NrTokens = 3;
1566 out[0].decl.File = TGSI_FILE_HW_ATOMIC;
1567 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1568 out[0].decl.Dimension = 1;
1569 out[0].decl.Array = array_id != 0;
1570
1571 out[1].value = 0;
1572 out[1].decl_range.First = first;
1573 out[1].decl_range.Last = last;
1574
1575 out[2].value = 0;
1576 out[2].decl_dim.Index2D = index2D;
1577
1578 if (array_id) {
1579 out[3].value = 0;
1580 out[3].array.ArrayID = array_id;
1581 }
1582 }
1583
1584 static void
emit_decl_fs(struct ureg_program * ureg,unsigned file,unsigned first,unsigned last,enum tgsi_semantic semantic_name,unsigned semantic_index,enum tgsi_interpolate_mode interpolate,enum tgsi_interpolate_loc interpolate_location,unsigned array_id,unsigned usage_mask)1585 emit_decl_fs(struct ureg_program *ureg,
1586 unsigned file,
1587 unsigned first,
1588 unsigned last,
1589 enum tgsi_semantic semantic_name,
1590 unsigned semantic_index,
1591 enum tgsi_interpolate_mode interpolate,
1592 enum tgsi_interpolate_loc interpolate_location,
1593 unsigned array_id,
1594 unsigned usage_mask)
1595 {
1596 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL,
1597 array_id ? 5 : 4);
1598
1599 out[0].value = 0;
1600 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1601 out[0].decl.NrTokens = 4;
1602 out[0].decl.File = file;
1603 out[0].decl.UsageMask = usage_mask;
1604 out[0].decl.Interpolate = 1;
1605 out[0].decl.Semantic = 1;
1606 out[0].decl.Array = array_id != 0;
1607
1608 out[1].value = 0;
1609 out[1].decl_range.First = first;
1610 out[1].decl_range.Last = last;
1611
1612 out[2].value = 0;
1613 out[2].decl_interp.Interpolate = interpolate;
1614 out[2].decl_interp.Location = interpolate_location;
1615
1616 out[3].value = 0;
1617 out[3].decl_semantic.Name = semantic_name;
1618 out[3].decl_semantic.Index = semantic_index;
1619
1620 if (array_id) {
1621 out[4].value = 0;
1622 out[4].array.ArrayID = array_id;
1623 }
1624 }
1625
1626 static void
emit_decl_temps(struct ureg_program * ureg,unsigned first,unsigned last,boolean local,unsigned arrayid)1627 emit_decl_temps( struct ureg_program *ureg,
1628 unsigned first, unsigned last,
1629 boolean local,
1630 unsigned arrayid )
1631 {
1632 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL,
1633 arrayid ? 3 : 2 );
1634
1635 out[0].value = 0;
1636 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1637 out[0].decl.NrTokens = 2;
1638 out[0].decl.File = TGSI_FILE_TEMPORARY;
1639 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1640 out[0].decl.Local = local;
1641
1642 out[1].value = 0;
1643 out[1].decl_range.First = first;
1644 out[1].decl_range.Last = last;
1645
1646 if (arrayid) {
1647 out[0].decl.Array = 1;
1648 out[2].value = 0;
1649 out[2].array.ArrayID = arrayid;
1650 }
1651 }
1652
emit_decl_range(struct ureg_program * ureg,unsigned file,unsigned first,unsigned count)1653 static void emit_decl_range( struct ureg_program *ureg,
1654 unsigned file,
1655 unsigned first,
1656 unsigned count )
1657 {
1658 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
1659
1660 out[0].value = 0;
1661 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1662 out[0].decl.NrTokens = 2;
1663 out[0].decl.File = file;
1664 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1665 out[0].decl.Semantic = 0;
1666
1667 out[1].value = 0;
1668 out[1].decl_range.First = first;
1669 out[1].decl_range.Last = first + count - 1;
1670 }
1671
1672 static void
emit_decl_range2D(struct ureg_program * ureg,unsigned file,unsigned first,unsigned last,unsigned index2D)1673 emit_decl_range2D(struct ureg_program *ureg,
1674 unsigned file,
1675 unsigned first,
1676 unsigned last,
1677 unsigned index2D)
1678 {
1679 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1680
1681 out[0].value = 0;
1682 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1683 out[0].decl.NrTokens = 3;
1684 out[0].decl.File = file;
1685 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1686 out[0].decl.Dimension = 1;
1687
1688 out[1].value = 0;
1689 out[1].decl_range.First = first;
1690 out[1].decl_range.Last = last;
1691
1692 out[2].value = 0;
1693 out[2].decl_dim.Index2D = index2D;
1694 }
1695
1696 static void
emit_decl_sampler_view(struct ureg_program * ureg,unsigned index,enum tgsi_texture_type target,enum tgsi_return_type return_type_x,enum tgsi_return_type return_type_y,enum tgsi_return_type return_type_z,enum tgsi_return_type return_type_w)1697 emit_decl_sampler_view(struct ureg_program *ureg,
1698 unsigned index,
1699 enum tgsi_texture_type target,
1700 enum tgsi_return_type return_type_x,
1701 enum tgsi_return_type return_type_y,
1702 enum tgsi_return_type return_type_z,
1703 enum tgsi_return_type return_type_w )
1704 {
1705 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1706
1707 out[0].value = 0;
1708 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1709 out[0].decl.NrTokens = 3;
1710 out[0].decl.File = TGSI_FILE_SAMPLER_VIEW;
1711 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1712
1713 out[1].value = 0;
1714 out[1].decl_range.First = index;
1715 out[1].decl_range.Last = index;
1716
1717 out[2].value = 0;
1718 out[2].decl_sampler_view.Resource = target;
1719 out[2].decl_sampler_view.ReturnTypeX = return_type_x;
1720 out[2].decl_sampler_view.ReturnTypeY = return_type_y;
1721 out[2].decl_sampler_view.ReturnTypeZ = return_type_z;
1722 out[2].decl_sampler_view.ReturnTypeW = return_type_w;
1723 }
1724
1725 static void
emit_decl_image(struct ureg_program * ureg,unsigned index,enum tgsi_texture_type target,enum pipe_format format,boolean wr,boolean raw)1726 emit_decl_image(struct ureg_program *ureg,
1727 unsigned index,
1728 enum tgsi_texture_type target,
1729 enum pipe_format format,
1730 boolean wr,
1731 boolean raw)
1732 {
1733 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 3);
1734
1735 out[0].value = 0;
1736 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1737 out[0].decl.NrTokens = 3;
1738 out[0].decl.File = TGSI_FILE_IMAGE;
1739 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1740
1741 out[1].value = 0;
1742 out[1].decl_range.First = index;
1743 out[1].decl_range.Last = index;
1744
1745 out[2].value = 0;
1746 out[2].decl_image.Resource = target;
1747 out[2].decl_image.Writable = wr;
1748 out[2].decl_image.Raw = raw;
1749 out[2].decl_image.Format = format;
1750 }
1751
1752 static void
emit_decl_buffer(struct ureg_program * ureg,unsigned index,bool atomic)1753 emit_decl_buffer(struct ureg_program *ureg,
1754 unsigned index,
1755 bool atomic)
1756 {
1757 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1758
1759 out[0].value = 0;
1760 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1761 out[0].decl.NrTokens = 2;
1762 out[0].decl.File = TGSI_FILE_BUFFER;
1763 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1764 out[0].decl.Atomic = atomic;
1765
1766 out[1].value = 0;
1767 out[1].decl_range.First = index;
1768 out[1].decl_range.Last = index;
1769 }
1770
1771 static void
emit_decl_memory(struct ureg_program * ureg,unsigned memory_type)1772 emit_decl_memory(struct ureg_program *ureg, unsigned memory_type)
1773 {
1774 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1775
1776 out[0].value = 0;
1777 out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
1778 out[0].decl.NrTokens = 2;
1779 out[0].decl.File = TGSI_FILE_MEMORY;
1780 out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW;
1781 out[0].decl.MemType = memory_type;
1782
1783 out[1].value = 0;
1784 out[1].decl_range.First = memory_type;
1785 out[1].decl_range.Last = memory_type;
1786 }
1787
1788 static void
emit_immediate(struct ureg_program * ureg,const unsigned * v,unsigned type)1789 emit_immediate( struct ureg_program *ureg,
1790 const unsigned *v,
1791 unsigned type )
1792 {
1793 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
1794
1795 out[0].value = 0;
1796 out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
1797 out[0].imm.NrTokens = 5;
1798 out[0].imm.DataType = type;
1799 out[0].imm.Padding = 0;
1800
1801 out[1].imm_data.Uint = v[0];
1802 out[2].imm_data.Uint = v[1];
1803 out[3].imm_data.Uint = v[2];
1804 out[4].imm_data.Uint = v[3];
1805 }
1806
1807 static void
emit_property(struct ureg_program * ureg,unsigned name,unsigned data)1808 emit_property(struct ureg_program *ureg,
1809 unsigned name,
1810 unsigned data)
1811 {
1812 union tgsi_any_token *out = get_tokens(ureg, DOMAIN_DECL, 2);
1813
1814 out[0].value = 0;
1815 out[0].prop.Type = TGSI_TOKEN_TYPE_PROPERTY;
1816 out[0].prop.NrTokens = 2;
1817 out[0].prop.PropertyName = name;
1818
1819 out[1].prop_data.Data = data;
1820 }
1821
1822 static int
input_sort(const void * in_a,const void * in_b)1823 input_sort(const void *in_a, const void *in_b)
1824 {
1825 const struct ureg_input_decl *a = in_a, *b = in_b;
1826
1827 return a->first - b->first;
1828 }
1829
1830 static int
output_sort(const void * in_a,const void * in_b)1831 output_sort(const void *in_a, const void *in_b)
1832 {
1833 const struct ureg_output_decl *a = in_a, *b = in_b;
1834
1835 return a->first - b->first;
1836 }
1837
1838 static int
atomic_decl_range_sort(const void * in_a,const void * in_b)1839 atomic_decl_range_sort(const void *in_a, const void *in_b)
1840 {
1841 const struct hw_atomic_decl_range *a = in_a, *b = in_b;
1842
1843 return a->first - b->first;
1844 }
1845
emit_decls(struct ureg_program * ureg)1846 static void emit_decls( struct ureg_program *ureg )
1847 {
1848 unsigned i,j;
1849
1850 for (i = 0; i < ARRAY_SIZE(ureg->properties); i++)
1851 if (ureg->properties[i] != ~0u)
1852 emit_property(ureg, i, ureg->properties[i]);
1853
1854 /* While not required by TGSI spec, virglrenderer has a dependency on the
1855 * inputs being sorted.
1856 */
1857 qsort(ureg->input, ureg->nr_inputs, sizeof(ureg->input[0]), input_sort);
1858
1859 if (ureg->processor == PIPE_SHADER_VERTEX) {
1860 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
1861 if (ureg->vs_inputs[i/32] & (1u << (i%32))) {
1862 emit_decl_range( ureg, TGSI_FILE_INPUT, i, 1 );
1863 }
1864 }
1865 } else if (ureg->processor == PIPE_SHADER_FRAGMENT) {
1866 if (ureg->supports_any_inout_decl_range) {
1867 for (i = 0; i < ureg->nr_inputs; i++) {
1868 emit_decl_fs(ureg,
1869 TGSI_FILE_INPUT,
1870 ureg->input[i].first,
1871 ureg->input[i].last,
1872 ureg->input[i].semantic_name,
1873 ureg->input[i].semantic_index,
1874 ureg->input[i].interp,
1875 ureg->input[i].interp_location,
1876 ureg->input[i].array_id,
1877 ureg->input[i].usage_mask);
1878 }
1879 }
1880 else {
1881 for (i = 0; i < ureg->nr_inputs; i++) {
1882 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
1883 emit_decl_fs(ureg,
1884 TGSI_FILE_INPUT,
1885 j, j,
1886 ureg->input[i].semantic_name,
1887 ureg->input[i].semantic_index +
1888 (j - ureg->input[i].first),
1889 ureg->input[i].interp,
1890 ureg->input[i].interp_location, 0,
1891 ureg->input[i].usage_mask);
1892 }
1893 }
1894 }
1895 } else {
1896 if (ureg->supports_any_inout_decl_range) {
1897 for (i = 0; i < ureg->nr_inputs; i++) {
1898 emit_decl_semantic(ureg,
1899 TGSI_FILE_INPUT,
1900 ureg->input[i].first,
1901 ureg->input[i].last,
1902 ureg->input[i].semantic_name,
1903 ureg->input[i].semantic_index,
1904 0,
1905 TGSI_WRITEMASK_XYZW,
1906 ureg->input[i].array_id,
1907 FALSE);
1908 }
1909 }
1910 else {
1911 for (i = 0; i < ureg->nr_inputs; i++) {
1912 for (j = ureg->input[i].first; j <= ureg->input[i].last; j++) {
1913 emit_decl_semantic(ureg,
1914 TGSI_FILE_INPUT,
1915 j, j,
1916 ureg->input[i].semantic_name,
1917 ureg->input[i].semantic_index +
1918 (j - ureg->input[i].first),
1919 0,
1920 TGSI_WRITEMASK_XYZW, 0, FALSE);
1921 }
1922 }
1923 }
1924 }
1925
1926 for (i = 0; i < ureg->nr_system_values; i++) {
1927 emit_decl_semantic(ureg,
1928 TGSI_FILE_SYSTEM_VALUE,
1929 i,
1930 i,
1931 ureg->system_value[i].semantic_name,
1932 ureg->system_value[i].semantic_index,
1933 0,
1934 TGSI_WRITEMASK_XYZW, 0, FALSE);
1935 }
1936
1937 /* While not required by TGSI spec, virglrenderer has a dependency on the
1938 * outputs being sorted.
1939 */
1940 qsort(ureg->output, ureg->nr_outputs, sizeof(ureg->output[0]), output_sort);
1941
1942 if (ureg->supports_any_inout_decl_range) {
1943 for (i = 0; i < ureg->nr_outputs; i++) {
1944 emit_decl_semantic(ureg,
1945 TGSI_FILE_OUTPUT,
1946 ureg->output[i].first,
1947 ureg->output[i].last,
1948 ureg->output[i].semantic_name,
1949 ureg->output[i].semantic_index,
1950 ureg->output[i].streams,
1951 ureg->output[i].usage_mask,
1952 ureg->output[i].array_id,
1953 ureg->output[i].invariant);
1954 }
1955 }
1956 else {
1957 for (i = 0; i < ureg->nr_outputs; i++) {
1958 for (j = ureg->output[i].first; j <= ureg->output[i].last; j++) {
1959 emit_decl_semantic(ureg,
1960 TGSI_FILE_OUTPUT,
1961 j, j,
1962 ureg->output[i].semantic_name,
1963 ureg->output[i].semantic_index +
1964 (j - ureg->output[i].first),
1965 ureg->output[i].streams,
1966 ureg->output[i].usage_mask,
1967 0,
1968 ureg->output[i].invariant);
1969 }
1970 }
1971 }
1972
1973 for (i = 0; i < ureg->nr_samplers; i++) {
1974 emit_decl_range( ureg,
1975 TGSI_FILE_SAMPLER,
1976 ureg->sampler[i].Index, 1 );
1977 }
1978
1979 for (i = 0; i < ureg->nr_sampler_views; i++) {
1980 emit_decl_sampler_view(ureg,
1981 ureg->sampler_view[i].index,
1982 ureg->sampler_view[i].target,
1983 ureg->sampler_view[i].return_type_x,
1984 ureg->sampler_view[i].return_type_y,
1985 ureg->sampler_view[i].return_type_z,
1986 ureg->sampler_view[i].return_type_w);
1987 }
1988
1989 for (i = 0; i < ureg->nr_images; i++) {
1990 emit_decl_image(ureg,
1991 ureg->image[i].index,
1992 ureg->image[i].target,
1993 ureg->image[i].format,
1994 ureg->image[i].wr,
1995 ureg->image[i].raw);
1996 }
1997
1998 for (i = 0; i < ureg->nr_buffers; i++) {
1999 emit_decl_buffer(ureg, ureg->buffer[i].index, ureg->buffer[i].atomic);
2000 }
2001
2002 for (i = 0; i < TGSI_MEMORY_TYPE_COUNT; i++) {
2003 if (ureg->use_memory[i])
2004 emit_decl_memory(ureg, i);
2005 }
2006
2007 for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) {
2008 struct const_decl *decl = &ureg->const_decls[i];
2009
2010 if (decl->nr_constant_ranges) {
2011 uint j;
2012
2013 for (j = 0; j < decl->nr_constant_ranges; j++) {
2014 emit_decl_range2D(ureg,
2015 TGSI_FILE_CONSTANT,
2016 decl->constant_range[j].first,
2017 decl->constant_range[j].last,
2018 i);
2019 }
2020 }
2021 }
2022
2023 for (i = 0; i < PIPE_MAX_HW_ATOMIC_BUFFERS; i++) {
2024 struct hw_atomic_decl *decl = &ureg->hw_atomic_decls[i];
2025
2026 if (decl->nr_hw_atomic_ranges) {
2027 uint j;
2028
2029 /* GLSL-to-TGSI generated HW atomic counters in order, and r600 depends
2030 * on it.
2031 */
2032 qsort(decl->hw_atomic_range, decl->nr_hw_atomic_ranges, sizeof(struct hw_atomic_decl_range), atomic_decl_range_sort);
2033
2034 for (j = 0; j < decl->nr_hw_atomic_ranges; j++) {
2035 emit_decl_atomic_2d(ureg,
2036 decl->hw_atomic_range[j].first,
2037 decl->hw_atomic_range[j].last,
2038 i,
2039 decl->hw_atomic_range[j].array_id);
2040 }
2041 }
2042 }
2043
2044 if (ureg->nr_temps) {
2045 unsigned array = 0;
2046 for (i = 0; i < ureg->nr_temps;) {
2047 boolean local = util_bitmask_get(ureg->local_temps, i);
2048 unsigned first = i;
2049 i = util_bitmask_get_next_index(ureg->decl_temps, i + 1);
2050 if (i == UTIL_BITMASK_INVALID_INDEX)
2051 i = ureg->nr_temps;
2052
2053 if (array < ureg->nr_array_temps && ureg->array_temps[array] == first)
2054 emit_decl_temps( ureg, first, i - 1, local, ++array );
2055 else
2056 emit_decl_temps( ureg, first, i - 1, local, 0 );
2057 }
2058 }
2059
2060 if (ureg->nr_addrs) {
2061 emit_decl_range( ureg,
2062 TGSI_FILE_ADDRESS,
2063 0, ureg->nr_addrs );
2064 }
2065
2066 for (i = 0; i < ureg->nr_immediates; i++) {
2067 emit_immediate( ureg,
2068 ureg->immediate[i].value.u,
2069 ureg->immediate[i].type );
2070 }
2071 }
2072
2073 /* Append the instruction tokens onto the declarations to build a
2074 * contiguous stream suitable to send to the driver.
2075 */
copy_instructions(struct ureg_program * ureg)2076 static void copy_instructions( struct ureg_program *ureg )
2077 {
2078 unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count;
2079 union tgsi_any_token *out = get_tokens( ureg,
2080 DOMAIN_DECL,
2081 nr_tokens );
2082
2083 memcpy(out,
2084 ureg->domain[DOMAIN_INSN].tokens,
2085 nr_tokens * sizeof out[0] );
2086 }
2087
2088
2089 static void
fixup_header_size(struct ureg_program * ureg)2090 fixup_header_size(struct ureg_program *ureg)
2091 {
2092 union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 0 );
2093
2094 out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 2;
2095 }
2096
2097
2098 static void
emit_header(struct ureg_program * ureg)2099 emit_header( struct ureg_program *ureg )
2100 {
2101 union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
2102
2103 out[0].header.HeaderSize = 2;
2104 out[0].header.BodySize = 0;
2105
2106 out[1].processor.Processor = ureg->processor;
2107 out[1].processor.Padding = 0;
2108 }
2109
2110
ureg_finalize(struct ureg_program * ureg)2111 const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
2112 {
2113 const struct tgsi_token *tokens;
2114
2115 switch (ureg->processor) {
2116 case PIPE_SHADER_VERTEX:
2117 case PIPE_SHADER_TESS_EVAL:
2118 ureg_property(ureg, TGSI_PROPERTY_NEXT_SHADER,
2119 ureg->next_shader_processor == -1 ?
2120 PIPE_SHADER_FRAGMENT :
2121 ureg->next_shader_processor);
2122 break;
2123 default:
2124 ; /* nothing */
2125 }
2126
2127 emit_header( ureg );
2128 emit_decls( ureg );
2129 copy_instructions( ureg );
2130 fixup_header_size( ureg );
2131
2132 if (ureg->domain[0].tokens == error_tokens ||
2133 ureg->domain[1].tokens == error_tokens) {
2134 debug_printf("%s: error in generated shader\n", __FUNCTION__);
2135 assert(0);
2136 return NULL;
2137 }
2138
2139 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
2140
2141 if (0) {
2142 debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
2143 ureg->domain[DOMAIN_DECL].count);
2144 tgsi_dump( tokens, 0 );
2145 }
2146
2147 #if DEBUG
2148 /* tgsi_sanity doesn't seem to return if there are too many constants. */
2149 bool too_many_constants = false;
2150 for (unsigned i = 0; i < ARRAY_SIZE(ureg->const_decls); i++) {
2151 for (unsigned j = 0; j < ureg->const_decls[i].nr_constant_ranges; j++) {
2152 if (ureg->const_decls[i].constant_range[j].last > 4096) {
2153 too_many_constants = true;
2154 break;
2155 }
2156 }
2157 }
2158
2159 if (tokens && !too_many_constants && !tgsi_sanity_check(tokens)) {
2160 debug_printf("tgsi_ureg.c, sanity check failed on generated tokens:\n");
2161 tgsi_dump(tokens, 0);
2162 assert(0);
2163 }
2164 #endif
2165
2166
2167 return tokens;
2168 }
2169
2170
ureg_create_shader(struct ureg_program * ureg,struct pipe_context * pipe,const struct pipe_stream_output_info * so)2171 void *ureg_create_shader( struct ureg_program *ureg,
2172 struct pipe_context *pipe,
2173 const struct pipe_stream_output_info *so )
2174 {
2175 struct pipe_shader_state state = {0};
2176
2177 pipe_shader_state_from_tgsi(&state, ureg_finalize(ureg));
2178 if(!state.tokens)
2179 return NULL;
2180
2181 if (so)
2182 state.stream_output = *so;
2183
2184 switch (ureg->processor) {
2185 case PIPE_SHADER_VERTEX:
2186 return pipe->create_vs_state(pipe, &state);
2187 case PIPE_SHADER_TESS_CTRL:
2188 return pipe->create_tcs_state(pipe, &state);
2189 case PIPE_SHADER_TESS_EVAL:
2190 return pipe->create_tes_state(pipe, &state);
2191 case PIPE_SHADER_GEOMETRY:
2192 return pipe->create_gs_state(pipe, &state);
2193 case PIPE_SHADER_FRAGMENT:
2194 return pipe->create_fs_state(pipe, &state);
2195 default:
2196 return NULL;
2197 }
2198 }
2199
2200
ureg_get_tokens(struct ureg_program * ureg,unsigned * nr_tokens)2201 const struct tgsi_token *ureg_get_tokens( struct ureg_program *ureg,
2202 unsigned *nr_tokens )
2203 {
2204 const struct tgsi_token *tokens;
2205
2206 ureg_finalize(ureg);
2207
2208 tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
2209
2210 if (nr_tokens)
2211 *nr_tokens = ureg->domain[DOMAIN_DECL].count;
2212
2213 ureg->domain[DOMAIN_DECL].tokens = NULL;
2214 ureg->domain[DOMAIN_DECL].size = 0;
2215 ureg->domain[DOMAIN_DECL].order = 0;
2216 ureg->domain[DOMAIN_DECL].count = 0;
2217
2218 return tokens;
2219 }
2220
2221
ureg_free_tokens(const struct tgsi_token * tokens)2222 void ureg_free_tokens( const struct tgsi_token *tokens )
2223 {
2224 FREE((struct tgsi_token *)tokens);
2225 }
2226
2227
2228 struct ureg_program *
ureg_create(enum pipe_shader_type processor)2229 ureg_create(enum pipe_shader_type processor)
2230 {
2231 return ureg_create_with_screen(processor, NULL);
2232 }
2233
2234
2235 struct ureg_program *
ureg_create_with_screen(enum pipe_shader_type processor,struct pipe_screen * screen)2236 ureg_create_with_screen(enum pipe_shader_type processor,
2237 struct pipe_screen *screen)
2238 {
2239 uint i;
2240 struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
2241 if (!ureg)
2242 goto no_ureg;
2243
2244 ureg->processor = processor;
2245 ureg->supports_any_inout_decl_range =
2246 screen &&
2247 screen->get_shader_param(screen, processor,
2248 PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE) != 0;
2249 ureg->next_shader_processor = -1;
2250
2251 for (i = 0; i < ARRAY_SIZE(ureg->properties); i++)
2252 ureg->properties[i] = ~0;
2253
2254 ureg->free_temps = util_bitmask_create();
2255 if (ureg->free_temps == NULL)
2256 goto no_free_temps;
2257
2258 ureg->local_temps = util_bitmask_create();
2259 if (ureg->local_temps == NULL)
2260 goto no_local_temps;
2261
2262 ureg->decl_temps = util_bitmask_create();
2263 if (ureg->decl_temps == NULL)
2264 goto no_decl_temps;
2265
2266 return ureg;
2267
2268 no_decl_temps:
2269 util_bitmask_destroy(ureg->local_temps);
2270 no_local_temps:
2271 util_bitmask_destroy(ureg->free_temps);
2272 no_free_temps:
2273 FREE(ureg);
2274 no_ureg:
2275 return NULL;
2276 }
2277
2278
2279 void
ureg_set_next_shader_processor(struct ureg_program * ureg,unsigned processor)2280 ureg_set_next_shader_processor(struct ureg_program *ureg, unsigned processor)
2281 {
2282 ureg->next_shader_processor = processor;
2283 }
2284
2285
2286 unsigned
ureg_get_nr_outputs(const struct ureg_program * ureg)2287 ureg_get_nr_outputs( const struct ureg_program *ureg )
2288 {
2289 if (!ureg)
2290 return 0;
2291 return ureg->nr_outputs;
2292 }
2293
2294 static void
ureg_setup_clipdist_info(struct ureg_program * ureg,const struct shader_info * info)2295 ureg_setup_clipdist_info(struct ureg_program *ureg,
2296 const struct shader_info *info)
2297 {
2298 if (info->clip_distance_array_size)
2299 ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
2300 info->clip_distance_array_size);
2301 if (info->cull_distance_array_size)
2302 ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2303 info->cull_distance_array_size);
2304 }
2305
2306 static void
ureg_setup_tess_ctrl_shader(struct ureg_program * ureg,const struct shader_info * info)2307 ureg_setup_tess_ctrl_shader(struct ureg_program *ureg,
2308 const struct shader_info *info)
2309 {
2310 ureg_property(ureg, TGSI_PROPERTY_TCS_VERTICES_OUT,
2311 info->tess.tcs_vertices_out);
2312 }
2313
2314 static void
ureg_setup_tess_eval_shader(struct ureg_program * ureg,const struct shader_info * info)2315 ureg_setup_tess_eval_shader(struct ureg_program *ureg,
2316 const struct shader_info *info)
2317 {
2318 ureg_property(ureg, TGSI_PROPERTY_TES_PRIM_MODE, u_tess_prim_from_shader(info->tess._primitive_mode));
2319
2320 STATIC_ASSERT((TESS_SPACING_EQUAL + 1) % 3 == PIPE_TESS_SPACING_EQUAL);
2321 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_ODD + 1) % 3 ==
2322 PIPE_TESS_SPACING_FRACTIONAL_ODD);
2323 STATIC_ASSERT((TESS_SPACING_FRACTIONAL_EVEN + 1) % 3 ==
2324 PIPE_TESS_SPACING_FRACTIONAL_EVEN);
2325
2326 ureg_property(ureg, TGSI_PROPERTY_TES_SPACING,
2327 (info->tess.spacing + 1) % 3);
2328
2329 ureg_property(ureg, TGSI_PROPERTY_TES_VERTEX_ORDER_CW,
2330 !info->tess.ccw);
2331 ureg_property(ureg, TGSI_PROPERTY_TES_POINT_MODE,
2332 info->tess.point_mode);
2333 }
2334
2335 static void
ureg_setup_geometry_shader(struct ureg_program * ureg,const struct shader_info * info)2336 ureg_setup_geometry_shader(struct ureg_program *ureg,
2337 const struct shader_info *info)
2338 {
2339 ureg_property(ureg, TGSI_PROPERTY_GS_INPUT_PRIM,
2340 info->gs.input_primitive);
2341 ureg_property(ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM,
2342 info->gs.output_primitive);
2343 ureg_property(ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
2344 info->gs.vertices_out);
2345 ureg_property(ureg, TGSI_PROPERTY_GS_INVOCATIONS,
2346 info->gs.invocations);
2347 }
2348
2349 static void
ureg_setup_fragment_shader(struct ureg_program * ureg,const struct shader_info * info)2350 ureg_setup_fragment_shader(struct ureg_program *ureg,
2351 const struct shader_info *info)
2352 {
2353 if (info->fs.early_fragment_tests || info->fs.post_depth_coverage) {
2354 ureg_property(ureg, TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL, 1);
2355
2356 if (info->fs.post_depth_coverage)
2357 ureg_property(ureg, TGSI_PROPERTY_FS_POST_DEPTH_COVERAGE, 1);
2358 }
2359
2360 if (info->fs.depth_layout != FRAG_DEPTH_LAYOUT_NONE) {
2361 switch (info->fs.depth_layout) {
2362 case FRAG_DEPTH_LAYOUT_ANY:
2363 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2364 TGSI_FS_DEPTH_LAYOUT_ANY);
2365 break;
2366 case FRAG_DEPTH_LAYOUT_GREATER:
2367 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2368 TGSI_FS_DEPTH_LAYOUT_GREATER);
2369 break;
2370 case FRAG_DEPTH_LAYOUT_LESS:
2371 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2372 TGSI_FS_DEPTH_LAYOUT_LESS);
2373 break;
2374 case FRAG_DEPTH_LAYOUT_UNCHANGED:
2375 ureg_property(ureg, TGSI_PROPERTY_FS_DEPTH_LAYOUT,
2376 TGSI_FS_DEPTH_LAYOUT_UNCHANGED);
2377 break;
2378 default:
2379 assert(0);
2380 }
2381 }
2382
2383 if (info->fs.advanced_blend_modes) {
2384 ureg_property(ureg, TGSI_PROPERTY_FS_BLEND_EQUATION_ADVANCED,
2385 info->fs.advanced_blend_modes);
2386 }
2387 }
2388
2389 static void
ureg_setup_compute_shader(struct ureg_program * ureg,const struct shader_info * info)2390 ureg_setup_compute_shader(struct ureg_program *ureg,
2391 const struct shader_info *info)
2392 {
2393 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH,
2394 info->workgroup_size[0]);
2395 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT,
2396 info->workgroup_size[1]);
2397 ureg_property(ureg, TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH,
2398 info->workgroup_size[2]);
2399
2400 if (info->shared_size)
2401 ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED);
2402 }
2403
2404 void
ureg_setup_shader_info(struct ureg_program * ureg,const struct shader_info * info)2405 ureg_setup_shader_info(struct ureg_program *ureg,
2406 const struct shader_info *info)
2407 {
2408 if (info->layer_viewport_relative)
2409 ureg_property(ureg, TGSI_PROPERTY_LAYER_VIEWPORT_RELATIVE, 1);
2410
2411 switch (info->stage) {
2412 case MESA_SHADER_VERTEX:
2413 ureg_setup_clipdist_info(ureg, info);
2414 ureg_set_next_shader_processor(ureg, pipe_shader_type_from_mesa(info->next_stage));
2415 break;
2416 case MESA_SHADER_TESS_CTRL:
2417 ureg_setup_tess_ctrl_shader(ureg, info);
2418 break;
2419 case MESA_SHADER_TESS_EVAL:
2420 ureg_setup_tess_eval_shader(ureg, info);
2421 ureg_setup_clipdist_info(ureg, info);
2422 ureg_set_next_shader_processor(ureg, pipe_shader_type_from_mesa(info->next_stage));
2423 break;
2424 case MESA_SHADER_GEOMETRY:
2425 ureg_setup_geometry_shader(ureg, info);
2426 ureg_setup_clipdist_info(ureg, info);
2427 break;
2428 case MESA_SHADER_FRAGMENT:
2429 ureg_setup_fragment_shader(ureg, info);
2430 break;
2431 case MESA_SHADER_COMPUTE:
2432 ureg_setup_compute_shader(ureg, info);
2433 break;
2434 default:
2435 break;
2436 }
2437 }
2438
2439
ureg_destroy(struct ureg_program * ureg)2440 void ureg_destroy( struct ureg_program *ureg )
2441 {
2442 unsigned i;
2443
2444 for (i = 0; i < ARRAY_SIZE(ureg->domain); i++) {
2445 if (ureg->domain[i].tokens &&
2446 ureg->domain[i].tokens != error_tokens)
2447 FREE(ureg->domain[i].tokens);
2448 }
2449
2450 util_bitmask_destroy(ureg->free_temps);
2451 util_bitmask_destroy(ureg->local_temps);
2452 util_bitmask_destroy(ureg->decl_temps);
2453
2454 FREE(ureg);
2455 }
2456
ureg_set_precise(struct ureg_program * ureg,bool precise)2457 void ureg_set_precise( struct ureg_program *ureg, bool precise )
2458 {
2459 ureg->precise = precise;
2460 }
2461