1 /*
2 * Copyright © 2019 Google, Inc
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /**
25 * \file lower_precision.cpp
26 */
27
28 #include "main/macros.h"
29 #include "main/consts_exts.h"
30 #include "compiler/glsl_types.h"
31 #include "ir.h"
32 #include "ir_builder.h"
33 #include "ir_optimization.h"
34 #include "ir_rvalue_visitor.h"
35 #include "util/half_float.h"
36 #include "util/set.h"
37 #include "util/hash_table.h"
38 #include <vector>
39
40 namespace {
41
42 class find_precision_visitor : public ir_rvalue_enter_visitor {
43 public:
44 find_precision_visitor(const struct gl_shader_compiler_options *options);
45 ~find_precision_visitor();
46
47 virtual void handle_rvalue(ir_rvalue **rvalue);
48 virtual ir_visitor_status visit_enter(ir_call *ir);
49
50 ir_function_signature *map_builtin(ir_function_signature *sig);
51
52 /* Set of rvalues that can be lowered. This will be filled in by
53 * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
54 * will be added to this set.
55 */
56 struct set *lowerable_rvalues;
57
58 /**
59 * A mapping of builtin signature functions to lowered versions. This is
60 * filled in lazily when a lowered version is needed.
61 */
62 struct hash_table *lowered_builtins;
63 /**
64 * A temporary hash table only used in order to clone functions.
65 */
66 struct hash_table *clone_ht;
67
68 void *lowered_builtin_mem_ctx;
69
70 const struct gl_shader_compiler_options *options;
71 };
72
73 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
74 public:
75 enum can_lower_state {
76 UNKNOWN,
77 CANT_LOWER,
78 SHOULD_LOWER,
79 };
80
81 enum parent_relation {
82 /* The parent performs a further operation involving the result from the
83 * child and can be lowered along with it.
84 */
85 COMBINED_OPERATION,
86 /* The parent instruction’s operation is independent of the child type so
87 * the child should be lowered separately.
88 */
89 INDEPENDENT_OPERATION,
90 };
91
92 struct stack_entry {
93 ir_instruction *instr;
94 enum can_lower_state state;
95 /* List of child rvalues that can be lowered. When this stack entry is
96 * popped, if this node itself can’t be lowered than all of the children
97 * are root nodes to lower so we will add them to lowerable_rvalues.
98 * Otherwise if this node can also be lowered then we won’t add the
99 * children because we only want to add the topmost lowerable nodes to
100 * lowerable_rvalues and the children will be lowered as part of lowering
101 * this node.
102 */
103 std::vector<ir_instruction *> lowerable_children;
104 };
105
106 find_lowerable_rvalues_visitor(struct set *result,
107 const struct gl_shader_compiler_options *options);
108
109 static void stack_enter(class ir_instruction *ir, void *data);
110 static void stack_leave(class ir_instruction *ir, void *data);
111
112 virtual ir_visitor_status visit(ir_constant *ir);
113 virtual ir_visitor_status visit(ir_dereference_variable *ir);
114
115 virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
116 virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
117 virtual ir_visitor_status visit_enter(ir_texture *ir);
118 virtual ir_visitor_status visit_enter(ir_expression *ir);
119
120 virtual ir_visitor_status visit_leave(ir_assignment *ir);
121 virtual ir_visitor_status visit_leave(ir_call *ir);
122
123 can_lower_state handle_precision(const glsl_type *type,
124 int precision) const;
125
126 static parent_relation get_parent_relation(ir_instruction *parent,
127 ir_instruction *child);
128
129 std::vector<stack_entry> stack;
130 struct set *lowerable_rvalues;
131 const struct gl_shader_compiler_options *options;
132
133 void pop_stack_entry();
134 void add_lowerable_children(const stack_entry &entry);
135 };
136
137 class lower_precision_visitor : public ir_rvalue_visitor {
138 public:
139 virtual void handle_rvalue(ir_rvalue **rvalue);
140 virtual ir_visitor_status visit_enter(ir_dereference_array *);
141 virtual ir_visitor_status visit_enter(ir_dereference_record *);
142 virtual ir_visitor_status visit_enter(ir_call *ir);
143 virtual ir_visitor_status visit_enter(ir_texture *ir);
144 virtual ir_visitor_status visit_leave(ir_expression *);
145 };
146
147 static bool
can_lower_type(const struct gl_shader_compiler_options * options,const glsl_type * type)148 can_lower_type(const struct gl_shader_compiler_options *options,
149 const glsl_type *type)
150 {
151 /* Don’t lower any expressions involving non-float types except bool and
152 * texture samplers. This will rule out operations that change the type such
153 * as conversion to ints. Instead it will end up lowering the arguments
154 * instead and adding a final conversion to float32. We want to handle
155 * boolean types so that it will do comparisons as 16-bit.
156 */
157
158 switch (glsl_without_array(type)->base_type) {
159 /* TODO: should we do anything for these two with regard to Int16 vs FP16
160 * support?
161 */
162 case GLSL_TYPE_BOOL:
163 case GLSL_TYPE_SAMPLER:
164 case GLSL_TYPE_IMAGE:
165 return true;
166
167 case GLSL_TYPE_FLOAT:
168 return options->LowerPrecisionFloat16;
169
170 case GLSL_TYPE_UINT:
171 case GLSL_TYPE_INT:
172 return options->LowerPrecisionInt16;
173
174 default:
175 return false;
176 }
177 }
178
find_lowerable_rvalues_visitor(struct set * res,const struct gl_shader_compiler_options * opts)179 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
180 const struct gl_shader_compiler_options *opts)
181 {
182 lowerable_rvalues = res;
183 options = opts;
184 callback_enter = stack_enter;
185 callback_leave = stack_leave;
186 data_enter = this;
187 data_leave = this;
188 }
189
190 void
stack_enter(class ir_instruction * ir,void * data)191 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
192 void *data)
193 {
194 find_lowerable_rvalues_visitor *state =
195 (find_lowerable_rvalues_visitor *) data;
196
197 /* Add a new stack entry for this instruction */
198 stack_entry entry;
199
200 entry.instr = ir;
201 entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
202
203 state->stack.push_back(entry);
204 }
205
206 void
add_lowerable_children(const stack_entry & entry)207 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
208 {
209 /* We can’t lower this node so if there were any pending children then they
210 * are all root lowerable nodes and we should add them to the set.
211 */
212 for (auto &it : entry.lowerable_children)
213 _mesa_set_add(lowerable_rvalues, it);
214 }
215
216 void
pop_stack_entry()217 find_lowerable_rvalues_visitor::pop_stack_entry()
218 {
219 const stack_entry &entry = stack.back();
220
221 if (stack.size() >= 2) {
222 /* Combine this state into the parent state, unless the parent operation
223 * doesn’t have any relation to the child operations
224 */
225 stack_entry &parent = stack.end()[-2];
226 parent_relation rel = get_parent_relation(parent.instr, entry.instr);
227
228 if (rel == COMBINED_OPERATION) {
229 switch (entry.state) {
230 case CANT_LOWER:
231 parent.state = CANT_LOWER;
232 break;
233 case SHOULD_LOWER:
234 if (parent.state == UNKNOWN)
235 parent.state = SHOULD_LOWER;
236 break;
237 case UNKNOWN:
238 break;
239 }
240 }
241 }
242
243 if (entry.state == SHOULD_LOWER) {
244 ir_rvalue *rv = entry.instr->as_rvalue();
245
246 if (rv == NULL) {
247 add_lowerable_children(entry);
248 } else if (stack.size() >= 2) {
249 stack_entry &parent = stack.end()[-2];
250
251 switch (get_parent_relation(parent.instr, rv)) {
252 case COMBINED_OPERATION:
253 /* We only want to add the toplevel lowerable instructions to the
254 * lowerable set. Therefore if there is a parent then instead of
255 * adding this instruction to the set we will queue depending on
256 * the result of the parent instruction.
257 */
258 parent.lowerable_children.push_back(entry.instr);
259 break;
260 case INDEPENDENT_OPERATION:
261 _mesa_set_add(lowerable_rvalues, rv);
262 break;
263 }
264 } else {
265 /* This is a toplevel node so add it directly to the lowerable
266 * set.
267 */
268 _mesa_set_add(lowerable_rvalues, rv);
269 }
270 } else if (entry.state == CANT_LOWER) {
271 add_lowerable_children(entry);
272 }
273
274 stack.pop_back();
275 }
276
277 void
stack_leave(class ir_instruction * ir,void * data)278 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
279 void *data)
280 {
281 find_lowerable_rvalues_visitor *state =
282 (find_lowerable_rvalues_visitor *) data;
283
284 state->pop_stack_entry();
285 }
286
287 enum find_lowerable_rvalues_visitor::can_lower_state
handle_precision(const glsl_type * type,int precision) const288 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
289 int precision) const
290 {
291 if (!can_lower_type(options, type))
292 return CANT_LOWER;
293
294 switch (precision) {
295 case GLSL_PRECISION_NONE:
296 return UNKNOWN;
297 case GLSL_PRECISION_HIGH:
298 return CANT_LOWER;
299 case GLSL_PRECISION_MEDIUM:
300 case GLSL_PRECISION_LOW:
301 return SHOULD_LOWER;
302 }
303
304 return CANT_LOWER;
305 }
306
307 enum find_lowerable_rvalues_visitor::parent_relation
get_parent_relation(ir_instruction * parent,ir_instruction * child)308 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
309 ir_instruction *child)
310 {
311 /* If the parent is a dereference instruction then the only child could be
312 * for example an array dereference and that should be lowered independently
313 * of the parent.
314 */
315 if (parent->as_dereference())
316 return INDEPENDENT_OPERATION;
317
318 /* The precision of texture sampling depend on the precision of the sampler.
319 * The rest of the arguments don’t matter so we can treat it as an
320 * independent operation.
321 */
322 if (parent->as_texture())
323 return INDEPENDENT_OPERATION;
324
325 return COMBINED_OPERATION;
326 }
327
328 ir_visitor_status
visit(ir_constant * ir)329 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
330 {
331 stack_enter(ir, this);
332
333 if (!can_lower_type(options, ir->type))
334 stack.back().state = CANT_LOWER;
335
336 stack_leave(ir, this);
337
338 return visit_continue;
339 }
340
341 ir_visitor_status
visit(ir_dereference_variable * ir)342 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
343 {
344 stack_enter(ir, this);
345
346 if (stack.back().state == UNKNOWN)
347 stack.back().state = handle_precision(ir->type, ir->precision());
348
349 stack_leave(ir, this);
350
351 return visit_continue;
352 }
353
354 ir_visitor_status
visit_enter(ir_dereference_record * ir)355 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
356 {
357 ir_hierarchical_visitor::visit_enter(ir);
358
359 if (stack.back().state == UNKNOWN)
360 stack.back().state = handle_precision(ir->type, ir->precision());
361
362 return visit_continue;
363 }
364
365 ir_visitor_status
visit_enter(ir_dereference_array * ir)366 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
367 {
368 ir_hierarchical_visitor::visit_enter(ir);
369
370 if (stack.back().state == UNKNOWN)
371 stack.back().state = handle_precision(ir->type, ir->precision());
372
373 return visit_continue;
374 }
375
376 ir_visitor_status
visit_enter(ir_texture * ir)377 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
378 {
379 ir_hierarchical_visitor::visit_enter(ir);
380
381 /* The precision of the sample value depends on the precision of the
382 * sampler.
383 */
384 stack.back().state = handle_precision(ir->type,
385 ir->sampler->precision());
386 return visit_continue;
387 }
388
389 ir_visitor_status
visit_enter(ir_expression * ir)390 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
391 {
392 ir_hierarchical_visitor::visit_enter(ir);
393
394 if (!can_lower_type(options, ir->type))
395 stack.back().state = CANT_LOWER;
396
397 /* Don't lower precision for derivative calculations */
398 if (!options->LowerPrecisionDerivatives &&
399 (ir->operation == ir_unop_dFdx ||
400 ir->operation == ir_unop_dFdx_coarse ||
401 ir->operation == ir_unop_dFdx_fine ||
402 ir->operation == ir_unop_dFdy ||
403 ir->operation == ir_unop_dFdy_coarse ||
404 ir->operation == ir_unop_dFdy_fine)) {
405 stack.back().state = CANT_LOWER;
406 }
407
408 return visit_continue;
409 }
410
411 static unsigned
handle_call(ir_call * ir,const struct set * lowerable_rvalues)412 handle_call(ir_call *ir, const struct set *lowerable_rvalues)
413 {
414 /* The intrinsic call is inside the wrapper imageLoad function that will
415 * be inlined. We have to handle both of them.
416 */
417 if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
418 (ir->callee->is_builtin() &&
419 !strcmp(ir->callee_name(), "imageLoad"))) {
420 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
421 ir_variable *resource = param->variable_referenced();
422
423 assert(ir->callee->return_precision == GLSL_PRECISION_HIGH);
424 assert(glsl_type_is_image(glsl_without_array(resource->type)));
425
426 /* GLSL ES 3.20 requires that images have a precision modifier, but if
427 * you set one, it doesn't do anything, because all intrinsics are
428 * defined with highp. This seems to be a spec bug.
429 *
430 * In theory we could set the return value to mediump if the image
431 * format has a lower precision. This appears to be the most sensible
432 * thing to do.
433 */
434 const struct util_format_description *desc =
435 util_format_description(resource->data.image_format);
436 int i =
437 util_format_get_first_non_void_channel(resource->data.image_format);
438 bool mediump;
439
440 assert(i >= 0);
441
442 if (desc->channel[i].pure_integer ||
443 desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
444 mediump = desc->channel[i].size <= 16;
445 else
446 mediump = desc->channel[i].size <= 10; /* unorm/snorm */
447
448 return mediump ? GLSL_PRECISION_MEDIUM : GLSL_PRECISION_HIGH;
449 }
450
451 /* Return the declared precision for user-defined functions. */
452 if (!ir->callee->is_builtin() || ir->callee->return_precision != GLSL_PRECISION_NONE)
453 return ir->callee->return_precision;
454
455 /* Handle special calls. */
456 if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
457 ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
458 ir_variable *var = param->variable_referenced();
459
460 /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
461 * be inlined by lower_precision() if we return true here, so that we can
462 * get to ir_texture later and do proper lowering.
463 *
464 * We should lower the type of the return value if the sampler type
465 * uses lower precision. The function parameters don't matter.
466 */
467 if (var && glsl_type_is_sampler(glsl_without_array(var->type))) {
468 /* textureGatherOffsets always takes a highp array of constants. As
469 * per the discussion https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16547#note_1393704
470 * trying to lower the precision results in segfault later on
471 * in the compiler as textureGatherOffsets will end up being passed
472 * a temp when its expecting a constant as required by the spec.
473 */
474 if (!strcmp(ir->callee_name(), "textureGatherOffsets"))
475 return GLSL_PRECISION_HIGH;
476
477 return var->data.precision;
478 }
479 }
480
481 if (ir->callee->return_precision != GLSL_PRECISION_NONE)
482 return ir->callee->return_precision;
483
484 if (/* Parameters are always implicitly promoted to highp: */
485 !strcmp(ir->callee_name(), "floatBitsToInt") ||
486 !strcmp(ir->callee_name(), "floatBitsToUint") ||
487 !strcmp(ir->callee_name(), "intBitsToFloat") ||
488 !strcmp(ir->callee_name(), "uintBitsToFloat"))
489 return GLSL_PRECISION_HIGH;
490
491 /* Number of parameters to check if they are lowerable. */
492 unsigned check_parameters = ir->actual_parameters.length();
493
494 /* "For the interpolateAt* functions, the call will return a precision
495 * qualification matching the precision of the interpolant argument to the
496 * function call."
497 *
498 * and
499 *
500 * "The precision qualification of the value returned from bitfieldExtract()
501 * matches the precision qualification of the call's input argument
502 * “value”."
503 */
504 if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
505 !strcmp(ir->callee_name(), "interpolateAtSample") ||
506 !strcmp(ir->callee_name(), "bitfieldExtract")) {
507 check_parameters = 1;
508 } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
509 /* "The precision qualification of the value returned from bitfieldInsert
510 * matches the highest precision qualification of the call's input
511 * arguments “base” and “insert”."
512 */
513 check_parameters = 2;
514 }
515
516 /* If the call is to a builtin, then the function won’t have a return
517 * precision and we should determine it from the precision of the arguments.
518 */
519 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
520 if (!check_parameters)
521 break;
522
523 if (!param->as_constant() &&
524 _mesa_set_search(lowerable_rvalues, param) == NULL)
525 return GLSL_PRECISION_HIGH;
526
527 --check_parameters;
528 }
529
530 return GLSL_PRECISION_MEDIUM;
531 }
532
533 ir_visitor_status
visit_leave(ir_call * ir)534 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
535 {
536 ir_hierarchical_visitor::visit_leave(ir);
537
538 /* Special case for handling temporary variables generated by the compiler
539 * for function calls. If we assign to one of these using a function call
540 * that has a lowerable return type then we can assume the temporary
541 * variable should have a medium precision too.
542 */
543
544 /* Do nothing if the return type is void. */
545 if (!ir->return_deref)
546 return visit_continue;
547
548 ir_variable *var = ir->return_deref->variable_referenced();
549
550 assert(var->data.mode == ir_var_temporary);
551
552 unsigned return_precision = handle_call(ir, lowerable_rvalues);
553
554 can_lower_state lower_state =
555 handle_precision(var->type, return_precision);
556
557 if (lower_state == SHOULD_LOWER) {
558 /* Function calls always write to a temporary return value in the caller,
559 * which has no other users. That temp may start with the precision of
560 * the function's signature, but if we're inferring the precision of an
561 * unqualified builtin operation (particularly the imageLoad overrides!)
562 * then we need to update it.
563 */
564 var->data.precision = GLSL_PRECISION_MEDIUM;
565 } else {
566 var->data.precision = GLSL_PRECISION_HIGH;
567 }
568
569 return visit_continue;
570 }
571
572 ir_visitor_status
visit_leave(ir_assignment * ir)573 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
574 {
575 ir_hierarchical_visitor::visit_leave(ir);
576
577 /* Special case for handling temporary variables generated by the compiler.
578 * If we assign to one of these using a lowered precision then we can assume
579 * the temporary variable should have a medium precision too.
580 */
581 ir_variable *var = ir->lhs->variable_referenced();
582
583 if (var->data.mode == ir_var_temporary) {
584 if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
585 /* Only override the precision if this is the first assignment. For
586 * temporaries such as the ones generated for the ?: operator there
587 * can be multiple assignments with different precisions. This way we
588 * get the highest precision of all of the assignments.
589 */
590 if (var->data.precision == GLSL_PRECISION_NONE)
591 var->data.precision = GLSL_PRECISION_MEDIUM;
592 } else if (!ir->rhs->as_constant()) {
593 var->data.precision = GLSL_PRECISION_HIGH;
594 }
595 }
596
597 return visit_continue;
598 }
599
600 void
find_lowerable_rvalues(const struct gl_shader_compiler_options * options,exec_list * instructions,struct set * result)601 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
602 exec_list *instructions,
603 struct set *result)
604 {
605 find_lowerable_rvalues_visitor v(result, options);
606
607 visit_list_elements(&v, instructions);
608
609 assert(v.stack.empty());
610 }
611
612 static const glsl_type *
convert_type(bool up,const glsl_type * type)613 convert_type(bool up, const glsl_type *type)
614 {
615 if (glsl_type_is_array(type)) {
616 return glsl_array_type(convert_type(up, type->fields.array),
617 glsl_array_size(type),
618 type->explicit_stride);
619 }
620
621 glsl_base_type new_base_type;
622
623 if (up) {
624 switch (type->base_type) {
625 case GLSL_TYPE_FLOAT16:
626 new_base_type = GLSL_TYPE_FLOAT;
627 break;
628 case GLSL_TYPE_INT16:
629 new_base_type = GLSL_TYPE_INT;
630 break;
631 case GLSL_TYPE_UINT16:
632 new_base_type = GLSL_TYPE_UINT;
633 break;
634 default:
635 unreachable("invalid type");
636 return NULL;
637 }
638 } else {
639 switch (type->base_type) {
640 case GLSL_TYPE_FLOAT:
641 new_base_type = GLSL_TYPE_FLOAT16;
642 break;
643 case GLSL_TYPE_INT:
644 new_base_type = GLSL_TYPE_INT16;
645 break;
646 case GLSL_TYPE_UINT:
647 new_base_type = GLSL_TYPE_UINT16;
648 break;
649 default:
650 unreachable("invalid type");
651 return NULL;
652 }
653 }
654
655 return glsl_simple_explicit_type(new_base_type,
656 type->vector_elements,
657 type->matrix_columns,
658 type->explicit_stride,
659 type->interface_row_major,
660 0 /* explicit_alignment */);
661 }
662
663 static const glsl_type *
lower_glsl_type(const glsl_type * type)664 lower_glsl_type(const glsl_type *type)
665 {
666 return convert_type(false, type);
667 }
668
669 static ir_rvalue *
convert_precision(bool up,ir_rvalue * ir)670 convert_precision(bool up, ir_rvalue *ir)
671 {
672 unsigned op;
673
674 if (up) {
675 switch (ir->type->base_type) {
676 case GLSL_TYPE_FLOAT16:
677 op = ir_unop_f162f;
678 break;
679 case GLSL_TYPE_INT16:
680 op = ir_unop_i2i;
681 break;
682 case GLSL_TYPE_UINT16:
683 op = ir_unop_u2u;
684 break;
685 default:
686 unreachable("invalid type");
687 return NULL;
688 }
689 } else {
690 switch (ir->type->base_type) {
691 case GLSL_TYPE_FLOAT:
692 op = ir_unop_f2fmp;
693 break;
694 case GLSL_TYPE_INT:
695 op = ir_unop_i2imp;
696 break;
697 case GLSL_TYPE_UINT:
698 op = ir_unop_u2ump;
699 break;
700 default:
701 unreachable("invalid type");
702 return NULL;
703 }
704 }
705
706 const glsl_type *desired_type = convert_type(up, ir->type);
707 void *mem_ctx = ralloc_parent(ir);
708 return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
709 }
710
711 void
handle_rvalue(ir_rvalue ** rvalue)712 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
713 {
714 ir_rvalue *ir = *rvalue;
715
716 if (ir == NULL)
717 return;
718
719 if (ir->as_dereference()) {
720 if (!glsl_type_is_boolean(ir->type))
721 *rvalue = convert_precision(false, ir);
722 } else if (glsl_type_is_32bit(ir->type)) {
723 ir->type = lower_glsl_type(ir->type);
724
725 ir_constant *const_ir = ir->as_constant();
726
727 if (const_ir) {
728 ir_constant_data value;
729
730 if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
731 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
732 value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
733 } else if (ir->type->base_type == GLSL_TYPE_INT16) {
734 for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
735 value.i16[i] = const_ir->value.i[i];
736 } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
737 for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
738 value.u16[i] = const_ir->value.u[i];
739 } else {
740 unreachable("invalid type");
741 }
742
743 const_ir->value = value;
744 }
745 }
746 }
747
748 ir_visitor_status
visit_enter(ir_dereference_record * ir)749 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
750 {
751 /* We don’t want to lower the variable */
752 return visit_continue_with_parent;
753 }
754
755 ir_visitor_status
visit_enter(ir_dereference_array * ir)756 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
757 {
758 /* We don’t want to convert the array index or the variable. If the array
759 * index itself is lowerable that will be handled separately.
760 */
761 return visit_continue_with_parent;
762 }
763
764 ir_visitor_status
visit_enter(ir_call * ir)765 lower_precision_visitor::visit_enter(ir_call *ir)
766 {
767 /* We don’t want to convert the arguments. These will be handled separately.
768 */
769 return visit_continue_with_parent;
770 }
771
772 ir_visitor_status
visit_enter(ir_texture * ir)773 lower_precision_visitor::visit_enter(ir_texture *ir)
774 {
775 /* We don’t want to convert the arguments. These will be handled separately.
776 */
777 return visit_continue_with_parent;
778 }
779
780 ir_visitor_status
visit_leave(ir_expression * ir)781 lower_precision_visitor::visit_leave(ir_expression *ir)
782 {
783 ir_rvalue_visitor::visit_leave(ir);
784
785 /* If the expression is a conversion operation to or from bool then fix the
786 * operation.
787 */
788 switch (ir->operation) {
789 case ir_unop_b2f:
790 ir->operation = ir_unop_b2f16;
791 break;
792 case ir_unop_f2b:
793 ir->operation = ir_unop_f162b;
794 break;
795 case ir_unop_b2i:
796 case ir_unop_i2b:
797 /* Nothing to do - they both support int16. */
798 break;
799 default:
800 break;
801 }
802
803 return visit_continue;
804 }
805
806 void
handle_rvalue(ir_rvalue ** rvalue)807 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
808 {
809 /* Checking the precision of rvalue can be lowered first throughout
810 * find_lowerable_rvalues_visitor.
811 * Once it found the precision of rvalue can be lowered, then we can
812 * add conversion f2fmp, etc. through lower_precision_visitor.
813 */
814 if (*rvalue == NULL)
815 return;
816
817 struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
818
819 if (!entry)
820 return;
821
822 _mesa_set_remove(lowerable_rvalues, entry);
823
824 /* If the entire expression is just a variable dereference then trying to
825 * lower it will just directly add pointless to and from conversions without
826 * any actual operation in-between. Although these will eventually get
827 * optimised out, avoiding generating them here also avoids breaking inout
828 * parameters to functions.
829 */
830 if ((*rvalue)->as_dereference())
831 return;
832
833 lower_precision_visitor v;
834
835 (*rvalue)->accept(&v);
836 v.handle_rvalue(rvalue);
837
838 /* We don’t need to add the final conversion if the final type has been
839 * converted to bool
840 */
841 if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
842 *rvalue = convert_precision(true, *rvalue);
843 }
844 }
845
846 ir_visitor_status
visit_enter(ir_call * ir)847 find_precision_visitor::visit_enter(ir_call *ir)
848 {
849 ir_rvalue_enter_visitor::visit_enter(ir);
850
851 ir_variable *return_var =
852 ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
853
854 /* Don't do anything for image_load here. We have only changed the return
855 * value to mediump/lowp, so that following instructions can use reduced
856 * precision.
857 *
858 * The return value type of the intrinsic itself isn't changed here, but
859 * can be changed in NIR if all users use the *2*mp opcode.
860 */
861 if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
862 return visit_continue;
863
864 /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
865 * overrode the precision of the temporary return variable, then we can
866 * replace the builtin implementation with a lowered version.
867 */
868
869 if (!ir->callee->is_builtin() ||
870 ir->callee->is_intrinsic() ||
871 return_var == NULL ||
872 (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
873 return_var->data.precision != GLSL_PRECISION_LOW))
874 return visit_continue;
875
876 ir->callee = map_builtin(ir->callee);
877 ir->generate_inline(ir);
878 ir->remove();
879
880 return visit_continue_with_parent;
881 }
882
883 ir_function_signature *
map_builtin(ir_function_signature * sig)884 find_precision_visitor::map_builtin(ir_function_signature *sig)
885 {
886 if (lowered_builtins == NULL) {
887 lowered_builtins = _mesa_pointer_hash_table_create(NULL);
888 clone_ht =_mesa_pointer_hash_table_create(NULL);
889 lowered_builtin_mem_ctx = ralloc_context(NULL);
890 } else {
891 struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
892 if (entry)
893 return (ir_function_signature *) entry->data;
894 }
895
896 ir_function_signature *lowered_sig =
897 sig->clone(lowered_builtin_mem_ctx, clone_ht);
898
899 /* If we're lowering the output precision of the function, then also lower
900 * the precision of its inputs unless they have a specific qualifier. The
901 * exception is bitCount, which doesn't declare its arguments highp but
902 * should not be lowering the args to mediump just because the output is
903 * lowp.
904 */
905 if (strcmp(sig->function_name(), "bitCount") != 0) {
906 foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
907 /* Demote the precision of unqualified function arguments. */
908 if (param->data.precision == GLSL_PRECISION_NONE)
909 param->data.precision = GLSL_PRECISION_MEDIUM;
910 }
911 }
912
913 lower_precision(options, &lowered_sig->body);
914
915 _mesa_hash_table_clear(clone_ht, NULL);
916
917 _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
918
919 return lowered_sig;
920 }
921
find_precision_visitor(const struct gl_shader_compiler_options * options)922 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
923 : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
924 lowered_builtins(NULL),
925 clone_ht(NULL),
926 lowered_builtin_mem_ctx(NULL),
927 options(options)
928 {
929 }
930
~find_precision_visitor()931 find_precision_visitor::~find_precision_visitor()
932 {
933 _mesa_set_destroy(lowerable_rvalues, NULL);
934
935 if (lowered_builtins) {
936 _mesa_hash_table_destroy(lowered_builtins, NULL);
937 _mesa_hash_table_destroy(clone_ht, NULL);
938 ralloc_free(lowered_builtin_mem_ctx);
939 }
940 }
941
942 /* Lowering opcodes to 16 bits is not enough for programs with control flow
943 * (and the ?: operator, which is represented by if-then-else in the IR),
944 * because temporary variables, which are used for passing values between
945 * code blocks, are not lowered, resulting in 32-bit phis in NIR.
946 *
947 * First change the variable types to 16 bits, then change all ir_dereference
948 * types to 16 bits.
949 */
950 class lower_variables_visitor : public ir_rvalue_enter_visitor {
951 public:
lower_variables_visitor(const struct gl_shader_compiler_options * options)952 lower_variables_visitor(const struct gl_shader_compiler_options *options)
953 : options(options) {
954 lower_vars = _mesa_pointer_set_create(NULL);
955 }
956
~lower_variables_visitor()957 virtual ~lower_variables_visitor()
958 {
959 _mesa_set_destroy(lower_vars, NULL);
960 }
961
962 virtual ir_visitor_status visit(ir_variable *var);
963 virtual ir_visitor_status visit_enter(ir_assignment *ir);
964 virtual ir_visitor_status visit_enter(ir_return *ir);
965 virtual ir_visitor_status visit_enter(ir_call *ir);
966 virtual void handle_rvalue(ir_rvalue **rvalue);
967
968 void fix_types_in_deref_chain(ir_dereference *ir);
969 void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs,
970 bool insert_before);
971
972 const struct gl_shader_compiler_options *options;
973 set *lower_vars;
974 };
975
976 static void
lower_constant(ir_constant * ir)977 lower_constant(ir_constant *ir)
978 {
979 if (glsl_type_is_array(ir->type)) {
980 for (int i = 0; i < glsl_array_size(ir->type); i++)
981 lower_constant(ir->get_array_element(i));
982
983 ir->type = lower_glsl_type(ir->type);
984 return;
985 }
986
987 ir->type = lower_glsl_type(ir->type);
988 ir_constant_data value;
989
990 if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
991 for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
992 value.f16[i] = _mesa_float_to_half(ir->value.f[i]);
993 } else if (ir->type->base_type == GLSL_TYPE_INT16) {
994 for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
995 value.i16[i] = ir->value.i[i];
996 } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
997 for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
998 value.u16[i] = ir->value.u[i];
999 } else {
1000 unreachable("invalid type");
1001 }
1002
1003 ir->value = value;
1004 }
1005
1006 ir_visitor_status
visit(ir_variable * var)1007 lower_variables_visitor::visit(ir_variable *var)
1008 {
1009 if ((var->data.mode != ir_var_temporary &&
1010 var->data.mode != ir_var_auto &&
1011 /* Lower uniforms but not UBOs. */
1012 (var->data.mode != ir_var_uniform ||
1013 var->is_in_buffer_block() ||
1014 !(options->LowerPrecisionFloat16Uniforms &&
1015 glsl_without_array(var->type)->base_type == GLSL_TYPE_FLOAT))) ||
1016 !glsl_type_is_32bit(glsl_without_array(var->type)) ||
1017 (var->data.precision != GLSL_PRECISION_MEDIUM &&
1018 var->data.precision != GLSL_PRECISION_LOW) ||
1019 !can_lower_type(options, var->type))
1020 return visit_continue;
1021
1022 /* Lower constant initializers. */
1023 if (var->constant_value &&
1024 var->type == var->constant_value->type) {
1025 if (!options->LowerPrecisionConstants)
1026 return visit_continue;
1027 var->constant_value =
1028 var->constant_value->clone(ralloc_parent(var), NULL);
1029 lower_constant(var->constant_value);
1030 }
1031
1032 if (var->constant_initializer &&
1033 var->type == var->constant_initializer->type) {
1034 if (!options->LowerPrecisionConstants)
1035 return visit_continue;
1036 var->constant_initializer =
1037 var->constant_initializer->clone(ralloc_parent(var), NULL);
1038 lower_constant(var->constant_initializer);
1039 }
1040
1041 var->type = lower_glsl_type(var->type);
1042 _mesa_set_add(lower_vars, var);
1043
1044 return visit_continue;
1045 }
1046
1047 void
fix_types_in_deref_chain(ir_dereference * ir)1048 lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir)
1049 {
1050 assert(glsl_type_is_32bit(glsl_without_array(ir->type)));
1051 assert(_mesa_set_search(lower_vars, ir->variable_referenced()));
1052
1053 /* Fix the type in the dereference node. */
1054 ir->type = lower_glsl_type(ir->type);
1055
1056 /* If it's an array, fix the types in the whole dereference chain. */
1057 for (ir_dereference_array *deref_array = ir->as_dereference_array();
1058 deref_array;
1059 deref_array = deref_array->array->as_dereference_array()) {
1060 assert(glsl_type_is_32bit(glsl_without_array(deref_array->array->type)));
1061 deref_array->array->type = lower_glsl_type(deref_array->array->type);
1062 }
1063 }
1064
1065 void
convert_split_assignment(ir_dereference * lhs,ir_rvalue * rhs,bool insert_before)1066 lower_variables_visitor::convert_split_assignment(ir_dereference *lhs,
1067 ir_rvalue *rhs,
1068 bool insert_before)
1069 {
1070 void *mem_ctx = ralloc_parent(lhs);
1071
1072 if (glsl_type_is_array(lhs->type)) {
1073 for (unsigned i = 0; i < lhs->type->length; i++) {
1074 ir_dereference *l, *r;
1075
1076 l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL),
1077 new(mem_ctx) ir_constant(i));
1078 r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL),
1079 new(mem_ctx) ir_constant(i));
1080 convert_split_assignment(l, r, insert_before);
1081 }
1082 return;
1083 }
1084
1085 assert(glsl_type_is_16bit(lhs->type) || glsl_type_is_32bit(lhs->type));
1086 assert(glsl_type_is_16bit(rhs->type) || glsl_type_is_32bit(rhs->type));
1087 assert(glsl_type_is_16bit(lhs->type) != glsl_type_is_16bit(rhs->type));
1088
1089 ir_assignment *assign =
1090 new(mem_ctx) ir_assignment(lhs, convert_precision(glsl_type_is_32bit(lhs->type), rhs));
1091
1092 if (insert_before)
1093 base_ir->insert_before(assign);
1094 else
1095 base_ir->insert_after(assign);
1096 }
1097
1098 ir_visitor_status
visit_enter(ir_assignment * ir)1099 lower_variables_visitor::visit_enter(ir_assignment *ir)
1100 {
1101 ir_dereference *lhs = ir->lhs;
1102 ir_variable *var = lhs->variable_referenced();
1103 ir_dereference *rhs_deref = ir->rhs->as_dereference();
1104 ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL;
1105 ir_constant *rhs_const = ir->rhs->as_constant();
1106
1107 /* Legalize array assignments between lowered and non-lowered variables. */
1108 if (glsl_type_is_array(lhs->type) &&
1109 (rhs_var || rhs_const) &&
1110 (!rhs_var ||
1111 (var &&
1112 glsl_type_is_16bit(glsl_without_array(var->type)) !=
1113 glsl_type_is_16bit(glsl_without_array(rhs_var->type)))) &&
1114 (!rhs_const ||
1115 (var &&
1116 glsl_type_is_16bit(glsl_without_array(var->type)) &&
1117 glsl_type_is_32bit(glsl_without_array(rhs_const->type))))) {
1118 assert(glsl_type_is_array(ir->rhs->type));
1119
1120 /* Fix array assignments from lowered to non-lowered. */
1121 if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) {
1122 fix_types_in_deref_chain(rhs_deref);
1123 /* Convert to 32 bits for LHS. */
1124 convert_split_assignment(lhs, rhs_deref, true);
1125 ir->remove();
1126 return visit_continue;
1127 }
1128
1129 /* Fix array assignments from non-lowered to lowered. */
1130 if (var &&
1131 _mesa_set_search(lower_vars, var) &&
1132 glsl_type_is_32bit(glsl_without_array(ir->rhs->type))) {
1133 fix_types_in_deref_chain(lhs);
1134 /* Convert to 16 bits for LHS. */
1135 convert_split_assignment(lhs, ir->rhs, true);
1136 ir->remove();
1137 return visit_continue;
1138 }
1139 }
1140
1141 /* Fix assignment types. */
1142 if (var &&
1143 _mesa_set_search(lower_vars, var)) {
1144 /* Fix the LHS type. */
1145 if (glsl_type_is_32bit(glsl_without_array(lhs->type)))
1146 fix_types_in_deref_chain(lhs);
1147
1148 /* Fix the RHS type if it's a lowered variable. */
1149 if (rhs_var &&
1150 _mesa_set_search(lower_vars, rhs_var) &&
1151 glsl_type_is_32bit(glsl_without_array(rhs_deref->type)))
1152 fix_types_in_deref_chain(rhs_deref);
1153
1154 /* Fix the RHS type if it's a non-array expression. */
1155 if (glsl_type_is_32bit(ir->rhs->type)) {
1156 ir_expression *expr = ir->rhs->as_expression();
1157
1158 /* Convert the RHS to the LHS type. */
1159 if (expr &&
1160 (expr->operation == ir_unop_f162f ||
1161 expr->operation == ir_unop_i2i ||
1162 expr->operation == ir_unop_u2u) &&
1163 glsl_type_is_16bit(expr->operands[0]->type)) {
1164 /* If there is an "up" conversion, just remove it.
1165 * This is optional. We could as well execute the else statement and
1166 * let NIR eliminate the up+down conversions.
1167 */
1168 ir->rhs = expr->operands[0];
1169 } else {
1170 /* Add a "down" conversion operation to fix the type of RHS. */
1171 ir->rhs = convert_precision(false, ir->rhs);
1172 }
1173 }
1174 }
1175
1176 return ir_rvalue_enter_visitor::visit_enter(ir);
1177 }
1178
1179 ir_visitor_status
visit_enter(ir_return * ir)1180 lower_variables_visitor::visit_enter(ir_return *ir)
1181 {
1182 void *mem_ctx = ralloc_parent(ir);
1183
1184 ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL;
1185 if (deref) {
1186 ir_variable *var = deref->variable_referenced();
1187
1188 /* Fix the type of the return value. */
1189 if (var &&
1190 _mesa_set_search(lower_vars, var) &&
1191 glsl_type_is_32bit(glsl_without_array(deref->type))) {
1192 /* Create a 32-bit temporary variable. */
1193 ir_variable *new_var =
1194 new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1195 base_ir->insert_before(new_var);
1196
1197 /* Fix types in dereferences. */
1198 fix_types_in_deref_chain(deref);
1199
1200 /* Convert to 32 bits for the return value. */
1201 convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1202 deref, true);
1203 ir->value = new(mem_ctx) ir_dereference_variable(new_var);
1204 }
1205 }
1206
1207 return ir_rvalue_enter_visitor::visit_enter(ir);
1208 }
1209
handle_rvalue(ir_rvalue ** rvalue)1210 void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue)
1211 {
1212 ir_rvalue *ir = *rvalue;
1213
1214 if (in_assignee || ir == NULL)
1215 return;
1216
1217 ir_expression *expr = ir->as_expression();
1218 ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL;
1219
1220 /* Remove f2fmp(float16). Same for int16 and uint16. */
1221 if (expr &&
1222 expr_op0_deref &&
1223 (expr->operation == ir_unop_f2fmp ||
1224 expr->operation == ir_unop_i2imp ||
1225 expr->operation == ir_unop_u2ump ||
1226 expr->operation == ir_unop_f2f16 ||
1227 expr->operation == ir_unop_i2i ||
1228 expr->operation == ir_unop_u2u) &&
1229 glsl_type_is_16bit(glsl_without_array(expr->type)) &&
1230 glsl_type_is_32bit(glsl_without_array(expr_op0_deref->type)) &&
1231 expr_op0_deref->variable_referenced() &&
1232 _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) {
1233 fix_types_in_deref_chain(expr_op0_deref);
1234
1235 /* Remove f2fmp/i2imp/u2ump. */
1236 *rvalue = expr_op0_deref;
1237 return;
1238 }
1239
1240 ir_dereference *deref = ir->as_dereference();
1241
1242 if (deref) {
1243 ir_variable *var = deref->variable_referenced();
1244
1245 /* var can be NULL if we are dereferencing ir_constant. */
1246 if (var &&
1247 _mesa_set_search(lower_vars, var) &&
1248 glsl_type_is_32bit(glsl_without_array(deref->type))) {
1249 void *mem_ctx = ralloc_parent(ir);
1250
1251 /* Create a 32-bit temporary variable. */
1252 ir_variable *new_var =
1253 new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1254 base_ir->insert_before(new_var);
1255
1256 /* Fix types in dereferences. */
1257 fix_types_in_deref_chain(deref);
1258
1259 /* Convert to 32 bits for the rvalue. */
1260 convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1261 deref, true);
1262 *rvalue = new(mem_ctx) ir_dereference_variable(new_var);
1263 }
1264 }
1265 }
1266
1267 ir_visitor_status
visit_enter(ir_call * ir)1268 lower_variables_visitor::visit_enter(ir_call *ir)
1269 {
1270 void *mem_ctx = ralloc_parent(ir);
1271
1272 /* We can't pass 16-bit variables as 32-bit inout/out parameters. */
1273 foreach_two_lists(formal_node, &ir->callee->parameters,
1274 actual_node, &ir->actual_parameters) {
1275 ir_dereference *param_deref =
1276 ((ir_rvalue *)actual_node)->as_dereference();
1277 ir_variable *param = (ir_variable *)formal_node;
1278
1279 if (!param_deref)
1280 continue;
1281
1282 ir_variable *var = param_deref->variable_referenced();
1283
1284 /* var can be NULL if we are dereferencing ir_constant. */
1285 if (var &&
1286 _mesa_set_search(lower_vars, var) &&
1287 glsl_type_is_32bit(glsl_without_array(param->type))) {
1288 fix_types_in_deref_chain(param_deref);
1289
1290 /* Create a 32-bit temporary variable for the parameter. */
1291 ir_variable *new_var =
1292 new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary);
1293 base_ir->insert_before(new_var);
1294
1295 /* Replace the parameter. */
1296 actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var));
1297
1298 if (param->data.mode == ir_var_function_in ||
1299 param->data.mode == ir_var_function_inout) {
1300 /* Convert to 32 bits for passing in. */
1301 convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1302 param_deref->clone(mem_ctx, NULL), true);
1303 }
1304 if (param->data.mode == ir_var_function_out ||
1305 param->data.mode == ir_var_function_inout) {
1306 /* Convert to 16 bits after returning. */
1307 convert_split_assignment(param_deref,
1308 new(mem_ctx) ir_dereference_variable(new_var),
1309 false);
1310 }
1311 }
1312 }
1313
1314 /* Fix the type of return value dereferencies. */
1315 ir_dereference_variable *ret_deref = ir->return_deref;
1316 ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL;
1317
1318 if (ret_var &&
1319 _mesa_set_search(lower_vars, ret_var) &&
1320 glsl_type_is_32bit(glsl_without_array(ret_deref->type))) {
1321 /* Create a 32-bit temporary variable. */
1322 ir_variable *new_var =
1323 new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp",
1324 ir_var_temporary);
1325 base_ir->insert_before(new_var);
1326
1327 /* Replace the return variable. */
1328 ret_deref->var = new_var;
1329
1330 /* Convert to 16 bits after returning. */
1331 convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var),
1332 new(mem_ctx) ir_dereference_variable(new_var),
1333 false);
1334 }
1335
1336 return ir_rvalue_enter_visitor::visit_enter(ir);
1337 }
1338
1339 }
1340
1341 void
lower_precision(const struct gl_shader_compiler_options * options,exec_list * instructions)1342 lower_precision(const struct gl_shader_compiler_options *options,
1343 exec_list *instructions)
1344 {
1345 find_precision_visitor v(options);
1346 find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
1347 visit_list_elements(&v, instructions);
1348
1349 lower_variables_visitor vars(options);
1350 visit_list_elements(&vars, instructions);
1351 }
1352