• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Google, Inc
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file lower_precision.cpp
26  */
27 
28 #include "main/macros.h"
29 #include "main/consts_exts.h"
30 #include "compiler/glsl_types.h"
31 #include "ir.h"
32 #include "ir_builder.h"
33 #include "ir_optimization.h"
34 #include "ir_rvalue_visitor.h"
35 #include "util/half_float.h"
36 #include "util/set.h"
37 #include "util/hash_table.h"
38 #include <vector>
39 
40 namespace {
41 
42 class find_precision_visitor : public ir_rvalue_enter_visitor {
43 public:
44    find_precision_visitor(const struct gl_shader_compiler_options *options);
45    ~find_precision_visitor();
46 
47    virtual void handle_rvalue(ir_rvalue **rvalue);
48    virtual ir_visitor_status visit_enter(ir_call *ir);
49 
50    ir_function_signature *map_builtin(ir_function_signature *sig);
51 
52    /* Set of rvalues that can be lowered. This will be filled in by
53     * find_lowerable_rvalues_visitor. Only the root node of a lowerable section
54     * will be added to this set.
55     */
56    struct set *lowerable_rvalues;
57 
58    /**
59     * A mapping of builtin signature functions to lowered versions. This is
60     * filled in lazily when a lowered version is needed.
61     */
62    struct hash_table *lowered_builtins;
63    /**
64     * A temporary hash table only used in order to clone functions.
65     */
66    struct hash_table *clone_ht;
67 
68    void *lowered_builtin_mem_ctx;
69 
70    const struct gl_shader_compiler_options *options;
71 };
72 
73 class find_lowerable_rvalues_visitor : public ir_hierarchical_visitor {
74 public:
75    enum can_lower_state {
76       UNKNOWN,
77       CANT_LOWER,
78       SHOULD_LOWER,
79    };
80 
81    enum parent_relation {
82       /* The parent performs a further operation involving the result from the
83        * child and can be lowered along with it.
84        */
85       COMBINED_OPERATION,
86       /* The parent instruction’s operation is independent of the child type so
87        * the child should be lowered separately.
88        */
89       INDEPENDENT_OPERATION,
90    };
91 
92    struct stack_entry {
93       ir_instruction *instr;
94       enum can_lower_state state;
95       /* List of child rvalues that can be lowered. When this stack entry is
96        * popped, if this node itself can’t be lowered than all of the children
97        * are root nodes to lower so we will add them to lowerable_rvalues.
98        * Otherwise if this node can also be lowered then we won’t add the
99        * children because we only want to add the topmost lowerable nodes to
100        * lowerable_rvalues and the children will be lowered as part of lowering
101        * this node.
102        */
103       std::vector<ir_instruction *> lowerable_children;
104    };
105 
106    find_lowerable_rvalues_visitor(struct set *result,
107                                   const struct gl_shader_compiler_options *options);
108 
109    static void stack_enter(class ir_instruction *ir, void *data);
110    static void stack_leave(class ir_instruction *ir, void *data);
111 
112    virtual ir_visitor_status visit(ir_constant *ir);
113    virtual ir_visitor_status visit(ir_dereference_variable *ir);
114 
115    virtual ir_visitor_status visit_enter(ir_dereference_record *ir);
116    virtual ir_visitor_status visit_enter(ir_dereference_array *ir);
117    virtual ir_visitor_status visit_enter(ir_texture *ir);
118    virtual ir_visitor_status visit_enter(ir_expression *ir);
119 
120    virtual ir_visitor_status visit_leave(ir_assignment *ir);
121    virtual ir_visitor_status visit_leave(ir_call *ir);
122 
123    can_lower_state handle_precision(const glsl_type *type,
124                                     int precision) const;
125 
126    static parent_relation get_parent_relation(ir_instruction *parent,
127                                               ir_instruction *child);
128 
129    std::vector<stack_entry> stack;
130    struct set *lowerable_rvalues;
131    const struct gl_shader_compiler_options *options;
132 
133    void pop_stack_entry();
134    void add_lowerable_children(const stack_entry &entry);
135 };
136 
137 class lower_precision_visitor : public ir_rvalue_visitor {
138 public:
139    virtual void handle_rvalue(ir_rvalue **rvalue);
140    virtual ir_visitor_status visit_enter(ir_dereference_array *);
141    virtual ir_visitor_status visit_enter(ir_dereference_record *);
142    virtual ir_visitor_status visit_enter(ir_call *ir);
143    virtual ir_visitor_status visit_enter(ir_texture *ir);
144    virtual ir_visitor_status visit_leave(ir_expression *);
145 };
146 
147 static bool
can_lower_type(const struct gl_shader_compiler_options * options,const glsl_type * type)148 can_lower_type(const struct gl_shader_compiler_options *options,
149                const glsl_type *type)
150 {
151    /* Don’t lower any expressions involving non-float types except bool and
152     * texture samplers. This will rule out operations that change the type such
153     * as conversion to ints. Instead it will end up lowering the arguments
154     * instead and adding a final conversion to float32. We want to handle
155     * boolean types so that it will do comparisons as 16-bit.
156     */
157 
158    switch (type->without_array()->base_type) {
159    /* TODO: should we do anything for these two with regard to Int16 vs FP16
160     * support?
161     */
162    case GLSL_TYPE_BOOL:
163    case GLSL_TYPE_SAMPLER:
164    case GLSL_TYPE_IMAGE:
165       return true;
166 
167    case GLSL_TYPE_FLOAT:
168       return options->LowerPrecisionFloat16;
169 
170    case GLSL_TYPE_UINT:
171    case GLSL_TYPE_INT:
172       return options->LowerPrecisionInt16;
173 
174    default:
175       return false;
176    }
177 }
178 
find_lowerable_rvalues_visitor(struct set * res,const struct gl_shader_compiler_options * opts)179 find_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res,
180                                  const struct gl_shader_compiler_options *opts)
181 {
182    lowerable_rvalues = res;
183    options = opts;
184    callback_enter = stack_enter;
185    callback_leave = stack_leave;
186    data_enter = this;
187    data_leave = this;
188 }
189 
190 void
stack_enter(class ir_instruction * ir,void * data)191 find_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir,
192                                             void *data)
193 {
194    find_lowerable_rvalues_visitor *state =
195       (find_lowerable_rvalues_visitor *) data;
196 
197    /* Add a new stack entry for this instruction */
198    stack_entry entry;
199 
200    entry.instr = ir;
201    entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN;
202 
203    state->stack.push_back(entry);
204 }
205 
206 void
add_lowerable_children(const stack_entry & entry)207 find_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry)
208 {
209    /* We can’t lower this node so if there were any pending children then they
210     * are all root lowerable nodes and we should add them to the set.
211     */
212    for (auto &it : entry.lowerable_children)
213       _mesa_set_add(lowerable_rvalues, it);
214 }
215 
216 void
pop_stack_entry()217 find_lowerable_rvalues_visitor::pop_stack_entry()
218 {
219    const stack_entry &entry = stack.back();
220 
221    if (stack.size() >= 2) {
222       /* Combine this state into the parent state, unless the parent operation
223        * doesn’t have any relation to the child operations
224        */
225       stack_entry &parent = stack.end()[-2];
226       parent_relation rel = get_parent_relation(parent.instr, entry.instr);
227 
228       if (rel == COMBINED_OPERATION) {
229          switch (entry.state) {
230          case CANT_LOWER:
231             parent.state = CANT_LOWER;
232             break;
233          case SHOULD_LOWER:
234             if (parent.state == UNKNOWN)
235                parent.state = SHOULD_LOWER;
236             break;
237          case UNKNOWN:
238             break;
239          }
240       }
241    }
242 
243    if (entry.state == SHOULD_LOWER) {
244       ir_rvalue *rv = entry.instr->as_rvalue();
245 
246       if (rv == NULL) {
247          add_lowerable_children(entry);
248       } else if (stack.size() >= 2) {
249          stack_entry &parent = stack.end()[-2];
250 
251          switch (get_parent_relation(parent.instr, rv)) {
252          case COMBINED_OPERATION:
253             /* We only want to add the toplevel lowerable instructions to the
254              * lowerable set. Therefore if there is a parent then instead of
255              * adding this instruction to the set we will queue depending on
256              * the result of the parent instruction.
257              */
258             parent.lowerable_children.push_back(entry.instr);
259             break;
260          case INDEPENDENT_OPERATION:
261             _mesa_set_add(lowerable_rvalues, rv);
262             break;
263          }
264       } else {
265          /* This is a toplevel node so add it directly to the lowerable
266           * set.
267           */
268          _mesa_set_add(lowerable_rvalues, rv);
269       }
270    } else if (entry.state == CANT_LOWER) {
271       add_lowerable_children(entry);
272    }
273 
274    stack.pop_back();
275 }
276 
277 void
stack_leave(class ir_instruction * ir,void * data)278 find_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir,
279                                             void *data)
280 {
281    find_lowerable_rvalues_visitor *state =
282       (find_lowerable_rvalues_visitor *) data;
283 
284    state->pop_stack_entry();
285 }
286 
287 enum find_lowerable_rvalues_visitor::can_lower_state
handle_precision(const glsl_type * type,int precision) const288 find_lowerable_rvalues_visitor::handle_precision(const glsl_type *type,
289                                                  int precision) const
290 {
291    if (!can_lower_type(options, type))
292       return CANT_LOWER;
293 
294    switch (precision) {
295    case GLSL_PRECISION_NONE:
296       return UNKNOWN;
297    case GLSL_PRECISION_HIGH:
298       return CANT_LOWER;
299    case GLSL_PRECISION_MEDIUM:
300    case GLSL_PRECISION_LOW:
301       return SHOULD_LOWER;
302    }
303 
304    return CANT_LOWER;
305 }
306 
307 enum find_lowerable_rvalues_visitor::parent_relation
get_parent_relation(ir_instruction * parent,ir_instruction * child)308 find_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent,
309                                                     ir_instruction *child)
310 {
311    /* If the parent is a dereference instruction then the only child could be
312     * for example an array dereference and that should be lowered independently
313     * of the parent.
314     */
315    if (parent->as_dereference())
316       return INDEPENDENT_OPERATION;
317 
318    /* The precision of texture sampling depend on the precision of the sampler.
319     * The rest of the arguments don’t matter so we can treat it as an
320     * independent operation.
321     */
322    if (parent->as_texture())
323       return INDEPENDENT_OPERATION;
324 
325    return COMBINED_OPERATION;
326 }
327 
328 ir_visitor_status
visit(ir_constant * ir)329 find_lowerable_rvalues_visitor::visit(ir_constant *ir)
330 {
331    stack_enter(ir, this);
332 
333    if (!can_lower_type(options, ir->type))
334       stack.back().state = CANT_LOWER;
335 
336    stack_leave(ir, this);
337 
338    return visit_continue;
339 }
340 
341 ir_visitor_status
visit(ir_dereference_variable * ir)342 find_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir)
343 {
344    stack_enter(ir, this);
345 
346    if (stack.back().state == UNKNOWN)
347       stack.back().state = handle_precision(ir->type, ir->precision());
348 
349    stack_leave(ir, this);
350 
351    return visit_continue;
352 }
353 
354 ir_visitor_status
visit_enter(ir_dereference_record * ir)355 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir)
356 {
357    ir_hierarchical_visitor::visit_enter(ir);
358 
359    if (stack.back().state == UNKNOWN)
360       stack.back().state = handle_precision(ir->type, ir->precision());
361 
362    return visit_continue;
363 }
364 
365 ir_visitor_status
visit_enter(ir_dereference_array * ir)366 find_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir)
367 {
368    ir_hierarchical_visitor::visit_enter(ir);
369 
370    if (stack.back().state == UNKNOWN)
371       stack.back().state = handle_precision(ir->type, ir->precision());
372 
373    return visit_continue;
374 }
375 
376 ir_visitor_status
visit_enter(ir_texture * ir)377 find_lowerable_rvalues_visitor::visit_enter(ir_texture *ir)
378 {
379    ir_hierarchical_visitor::visit_enter(ir);
380 
381    /* The precision of the sample value depends on the precision of the
382     * sampler.
383     */
384    stack.back().state = handle_precision(ir->type,
385                                          ir->sampler->precision());
386    return visit_continue;
387 }
388 
389 ir_visitor_status
visit_enter(ir_expression * ir)390 find_lowerable_rvalues_visitor::visit_enter(ir_expression *ir)
391 {
392    ir_hierarchical_visitor::visit_enter(ir);
393 
394    if (!can_lower_type(options, ir->type))
395       stack.back().state = CANT_LOWER;
396 
397    /* Don't lower precision for derivative calculations */
398    if (!options->LowerPrecisionDerivatives &&
399        (ir->operation == ir_unop_dFdx ||
400         ir->operation == ir_unop_dFdx_coarse ||
401         ir->operation == ir_unop_dFdx_fine ||
402         ir->operation == ir_unop_dFdy ||
403         ir->operation == ir_unop_dFdy_coarse ||
404         ir->operation == ir_unop_dFdy_fine)) {
405       stack.back().state = CANT_LOWER;
406    }
407 
408    return visit_continue;
409 }
410 
411 static bool
function_always_returns_mediump_or_lowp(const char * name)412 function_always_returns_mediump_or_lowp(const char *name)
413 {
414    return !strcmp(name, "bitCount") ||
415           !strcmp(name, "findLSB") ||
416           !strcmp(name, "findMSB") ||
417           !strcmp(name, "unpackHalf2x16") ||
418           !strcmp(name, "unpackUnorm4x8") ||
419           !strcmp(name, "unpackSnorm4x8");
420 }
421 
422 static unsigned
handle_call(ir_call * ir,const struct set * lowerable_rvalues)423 handle_call(ir_call *ir, const struct set *lowerable_rvalues)
424 {
425    /* The intrinsic call is inside the wrapper imageLoad function that will
426     * be inlined. We have to handle both of them.
427     */
428    if (ir->callee->intrinsic_id == ir_intrinsic_image_load ||
429        (ir->callee->is_builtin() &&
430         !strcmp(ir->callee_name(), "imageLoad"))) {
431       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
432       ir_variable *resource = param->variable_referenced();
433 
434       assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
435       assert(resource->type->without_array()->is_image());
436 
437       /* GLSL ES 3.20 requires that images have a precision modifier, but if
438        * you set one, it doesn't do anything, because all intrinsics are
439        * defined with highp. This seems to be a spec bug.
440        *
441        * In theory we could set the return value to mediump if the image
442        * format has a lower precision. This appears to be the most sensible
443        * thing to do.
444        */
445       const struct util_format_description *desc =
446          util_format_description(resource->data.image_format);
447       int i =
448          util_format_get_first_non_void_channel(resource->data.image_format);
449       bool mediump;
450 
451       assert(i >= 0);
452 
453       if (desc->channel[i].pure_integer ||
454           desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT)
455          mediump = desc->channel[i].size <= 16;
456       else
457          mediump = desc->channel[i].size <= 10; /* unorm/snorm */
458 
459       return mediump ? GLSL_PRECISION_MEDIUM : GLSL_PRECISION_HIGH;
460    }
461 
462    /* Return the declared precision for user-defined functions. */
463    if (!ir->callee->is_builtin())
464       return ir->callee->return_precision;
465 
466    /* Handle special calls. */
467    if (ir->callee->is_builtin() && ir->actual_parameters.length()) {
468       ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head();
469       ir_variable *var = param->variable_referenced();
470 
471       /* Handle builtin wrappers around ir_texture opcodes. These wrappers will
472        * be inlined by lower_precision() if we return true here, so that we can
473        * get to ir_texture later and do proper lowering.
474        *
475        * We should lower the type of the return value if the sampler type
476        * uses lower precision. The function parameters don't matter.
477        */
478       if (var && var->type->without_array()->is_sampler()) {
479          /* textureSize always returns highp. */
480          if (!strcmp(ir->callee_name(), "textureSize"))
481             return GLSL_PRECISION_HIGH;
482 
483          /* textureGatherOffsets always takes a highp array of constants. As
484           * per the discussion https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16547#note_1393704
485           * trying to lower the precision results in segfault later on
486           * in the compiler as textureGatherOffsets will end up being passed
487           * a temp when its expecting a constant as required by the spec.
488           */
489          if (!strcmp(ir->callee_name(), "textureGatherOffsets"))
490             return GLSL_PRECISION_HIGH;
491 
492          return var->data.precision;
493       }
494    }
495 
496    if (/* Parameters are always highp: */
497        !strcmp(ir->callee_name(), "floatBitsToInt") ||
498        !strcmp(ir->callee_name(), "floatBitsToUint") ||
499        !strcmp(ir->callee_name(), "intBitsToFloat") ||
500        !strcmp(ir->callee_name(), "uintBitsToFloat") ||
501        !strcmp(ir->callee_name(), "bitfieldReverse") ||
502        !strcmp(ir->callee_name(), "frexp") ||
503        !strcmp(ir->callee_name(), "ldexp") ||
504        /* Parameters and outputs are always highp: */
505        /* TODO: The operations are highp, but carry and borrow outputs are lowp. */
506        !strcmp(ir->callee_name(), "uaddCarry") ||
507        !strcmp(ir->callee_name(), "usubBorrow") ||
508        !strcmp(ir->callee_name(), "imulExtended") ||
509        !strcmp(ir->callee_name(), "umulExtended") ||
510        !strcmp(ir->callee_name(), "unpackUnorm2x16") ||
511        !strcmp(ir->callee_name(), "unpackSnorm2x16") ||
512        /* Outputs are highp: */
513        !strcmp(ir->callee_name(), "packUnorm2x16") ||
514        !strcmp(ir->callee_name(), "packSnorm2x16") ||
515        /* Parameters are mediump and outputs are highp. The parameters should
516         * be optimized in NIR, not here, e.g:
517         * - packHalf2x16 can just be a bitcast from f16vec2 to uint32
518         * - Other opcodes don't have to convert parameters to highp if the hw
519         *   has f16 versions. Optimize in NIR accordingly.
520         */
521        !strcmp(ir->callee_name(), "packHalf2x16") ||
522        !strcmp(ir->callee_name(), "packUnorm4x8") ||
523        !strcmp(ir->callee_name(), "packSnorm4x8") ||
524        /* Atomic functions are not lowered. */
525        strstr(ir->callee_name(), "atomic") == ir->callee_name())
526       return GLSL_PRECISION_HIGH;
527 
528    assert(ir->callee->return_precision == GLSL_PRECISION_NONE);
529 
530    /* Number of parameters to check if they are lowerable. */
531    unsigned check_parameters = ir->actual_parameters.length();
532 
533    /* Interpolation functions only consider the precision of the interpolant. */
534    /* Bitfield functions ignore the precision of "offset" and "bits". */
535    if (!strcmp(ir->callee_name(), "interpolateAtOffset") ||
536        !strcmp(ir->callee_name(), "interpolateAtSample") ||
537        !strcmp(ir->callee_name(), "bitfieldExtract")) {
538       check_parameters = 1;
539    } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) {
540       check_parameters = 2;
541    } if (function_always_returns_mediump_or_lowp(ir->callee_name())) {
542       /* These only lower the return value. Parameters keep their precision,
543        * which is preserved in map_builtin.
544        */
545       check_parameters = 0;
546    }
547 
548    /* If the call is to a builtin, then the function won’t have a return
549     * precision and we should determine it from the precision of the arguments.
550     */
551    foreach_in_list(ir_rvalue, param, &ir->actual_parameters) {
552       if (!check_parameters)
553          break;
554 
555       if (!param->as_constant() &&
556           _mesa_set_search(lowerable_rvalues, param) == NULL)
557          return GLSL_PRECISION_HIGH;
558 
559       --check_parameters;
560    }
561 
562    return GLSL_PRECISION_MEDIUM;
563 }
564 
565 ir_visitor_status
visit_leave(ir_call * ir)566 find_lowerable_rvalues_visitor::visit_leave(ir_call *ir)
567 {
568    ir_hierarchical_visitor::visit_leave(ir);
569 
570    /* Special case for handling temporary variables generated by the compiler
571     * for function calls. If we assign to one of these using a function call
572     * that has a lowerable return type then we can assume the temporary
573     * variable should have a medium precision too.
574     */
575 
576    /* Do nothing if the return type is void. */
577    if (!ir->return_deref)
578       return visit_continue;
579 
580    ir_variable *var = ir->return_deref->variable_referenced();
581 
582    assert(var->data.mode == ir_var_temporary);
583 
584    unsigned return_precision = handle_call(ir, lowerable_rvalues);
585 
586    can_lower_state lower_state =
587       handle_precision(var->type, return_precision);
588 
589    if (lower_state == SHOULD_LOWER) {
590       /* There probably shouldn’t be any situations where multiple ir_call
591        * instructions write to the same temporary?
592        */
593       assert(var->data.precision == GLSL_PRECISION_NONE);
594       var->data.precision = GLSL_PRECISION_MEDIUM;
595    } else {
596       var->data.precision = GLSL_PRECISION_HIGH;
597    }
598 
599    return visit_continue;
600 }
601 
602 ir_visitor_status
visit_leave(ir_assignment * ir)603 find_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir)
604 {
605    ir_hierarchical_visitor::visit_leave(ir);
606 
607    /* Special case for handling temporary variables generated by the compiler.
608     * If we assign to one of these using a lowered precision then we can assume
609     * the temporary variable should have a medium precision too.
610     */
611    ir_variable *var = ir->lhs->variable_referenced();
612 
613    if (var->data.mode == ir_var_temporary) {
614       if (_mesa_set_search(lowerable_rvalues, ir->rhs)) {
615          /* Only override the precision if this is the first assignment. For
616           * temporaries such as the ones generated for the ?: operator there
617           * can be multiple assignments with different precisions. This way we
618           * get the highest precision of all of the assignments.
619           */
620          if (var->data.precision == GLSL_PRECISION_NONE)
621             var->data.precision = GLSL_PRECISION_MEDIUM;
622       } else if (!ir->rhs->as_constant()) {
623          var->data.precision = GLSL_PRECISION_HIGH;
624       }
625    }
626 
627    return visit_continue;
628 }
629 
630 void
find_lowerable_rvalues(const struct gl_shader_compiler_options * options,exec_list * instructions,struct set * result)631 find_lowerable_rvalues(const struct gl_shader_compiler_options *options,
632                        exec_list *instructions,
633                        struct set *result)
634 {
635    find_lowerable_rvalues_visitor v(result, options);
636 
637    visit_list_elements(&v, instructions);
638 
639    assert(v.stack.empty());
640 }
641 
642 static const glsl_type *
convert_type(bool up,const glsl_type * type)643 convert_type(bool up, const glsl_type *type)
644 {
645    if (type->is_array()) {
646       return glsl_type::get_array_instance(convert_type(up, type->fields.array),
647                                            type->array_size(),
648                                            type->explicit_stride);
649    }
650 
651    glsl_base_type new_base_type;
652 
653    if (up) {
654       switch (type->base_type) {
655       case GLSL_TYPE_FLOAT16:
656          new_base_type = GLSL_TYPE_FLOAT;
657          break;
658       case GLSL_TYPE_INT16:
659          new_base_type = GLSL_TYPE_INT;
660          break;
661       case GLSL_TYPE_UINT16:
662          new_base_type = GLSL_TYPE_UINT;
663          break;
664       default:
665          unreachable("invalid type");
666          return NULL;
667       }
668    } else {
669       switch (type->base_type) {
670       case GLSL_TYPE_FLOAT:
671          new_base_type = GLSL_TYPE_FLOAT16;
672          break;
673       case GLSL_TYPE_INT:
674          new_base_type = GLSL_TYPE_INT16;
675          break;
676       case GLSL_TYPE_UINT:
677          new_base_type = GLSL_TYPE_UINT16;
678          break;
679       default:
680          unreachable("invalid type");
681          return NULL;
682       }
683    }
684 
685    return glsl_type::get_instance(new_base_type,
686                                   type->vector_elements,
687                                   type->matrix_columns,
688                                   type->explicit_stride,
689                                   type->interface_row_major);
690 }
691 
692 static const glsl_type *
lower_glsl_type(const glsl_type * type)693 lower_glsl_type(const glsl_type *type)
694 {
695    return convert_type(false, type);
696 }
697 
698 static ir_rvalue *
convert_precision(bool up,ir_rvalue * ir)699 convert_precision(bool up, ir_rvalue *ir)
700 {
701    unsigned op;
702 
703    if (up) {
704       switch (ir->type->base_type) {
705       case GLSL_TYPE_FLOAT16:
706          op = ir_unop_f162f;
707          break;
708       case GLSL_TYPE_INT16:
709          op = ir_unop_i2i;
710          break;
711       case GLSL_TYPE_UINT16:
712          op = ir_unop_u2u;
713          break;
714       default:
715          unreachable("invalid type");
716          return NULL;
717       }
718    } else {
719       switch (ir->type->base_type) {
720       case GLSL_TYPE_FLOAT:
721          op = ir_unop_f2fmp;
722          break;
723       case GLSL_TYPE_INT:
724          op = ir_unop_i2imp;
725          break;
726       case GLSL_TYPE_UINT:
727          op = ir_unop_u2ump;
728          break;
729       default:
730          unreachable("invalid type");
731          return NULL;
732       }
733    }
734 
735    const glsl_type *desired_type = convert_type(up, ir->type);
736    void *mem_ctx = ralloc_parent(ir);
737    return new(mem_ctx) ir_expression(op, desired_type, ir, NULL);
738 }
739 
740 void
handle_rvalue(ir_rvalue ** rvalue)741 lower_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
742 {
743    ir_rvalue *ir = *rvalue;
744 
745    if (ir == NULL)
746       return;
747 
748    if (ir->as_dereference()) {
749       if (!ir->type->is_boolean())
750          *rvalue = convert_precision(false, ir);
751    } else if (ir->type->is_32bit()) {
752       ir->type = lower_glsl_type(ir->type);
753 
754       ir_constant *const_ir = ir->as_constant();
755 
756       if (const_ir) {
757          ir_constant_data value;
758 
759          if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
760             for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
761                value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]);
762          } else if (ir->type->base_type == GLSL_TYPE_INT16) {
763             for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
764                value.i16[i] = const_ir->value.i[i];
765          } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
766             for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
767                value.u16[i] = const_ir->value.u[i];
768          } else {
769             unreachable("invalid type");
770          }
771 
772          const_ir->value = value;
773       }
774    }
775 }
776 
777 ir_visitor_status
visit_enter(ir_dereference_record * ir)778 lower_precision_visitor::visit_enter(ir_dereference_record *ir)
779 {
780    /* We don’t want to lower the variable */
781    return visit_continue_with_parent;
782 }
783 
784 ir_visitor_status
visit_enter(ir_dereference_array * ir)785 lower_precision_visitor::visit_enter(ir_dereference_array *ir)
786 {
787    /* We don’t want to convert the array index or the variable. If the array
788     * index itself is lowerable that will be handled separately.
789     */
790    return visit_continue_with_parent;
791 }
792 
793 ir_visitor_status
visit_enter(ir_call * ir)794 lower_precision_visitor::visit_enter(ir_call *ir)
795 {
796    /* We don’t want to convert the arguments. These will be handled separately.
797     */
798    return visit_continue_with_parent;
799 }
800 
801 ir_visitor_status
visit_enter(ir_texture * ir)802 lower_precision_visitor::visit_enter(ir_texture *ir)
803 {
804    /* We don’t want to convert the arguments. These will be handled separately.
805     */
806    return visit_continue_with_parent;
807 }
808 
809 ir_visitor_status
visit_leave(ir_expression * ir)810 lower_precision_visitor::visit_leave(ir_expression *ir)
811 {
812    ir_rvalue_visitor::visit_leave(ir);
813 
814    /* If the expression is a conversion operation to or from bool then fix the
815     * operation.
816     */
817    switch (ir->operation) {
818    case ir_unop_b2f:
819       ir->operation = ir_unop_b2f16;
820       break;
821    case ir_unop_f2b:
822       ir->operation = ir_unop_f162b;
823       break;
824    case ir_unop_b2i:
825    case ir_unop_i2b:
826       /* Nothing to do - they both support int16. */
827       break;
828    default:
829       break;
830    }
831 
832    return visit_continue;
833 }
834 
835 void
handle_rvalue(ir_rvalue ** rvalue)836 find_precision_visitor::handle_rvalue(ir_rvalue **rvalue)
837 {
838    /* Checking the precision of rvalue can be lowered first throughout
839     * find_lowerable_rvalues_visitor.
840     * Once it found the precision of rvalue can be lowered, then we can
841     * add conversion f2fmp, etc. through lower_precision_visitor.
842     */
843    if (*rvalue == NULL)
844       return;
845 
846    struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue);
847 
848    if (!entry)
849       return;
850 
851    _mesa_set_remove(lowerable_rvalues, entry);
852 
853    /* If the entire expression is just a variable dereference then trying to
854     * lower it will just directly add pointless to and from conversions without
855     * any actual operation in-between. Although these will eventually get
856     * optimised out, avoiding generating them here also avoids breaking inout
857     * parameters to functions.
858     */
859    if ((*rvalue)->as_dereference())
860       return;
861 
862    lower_precision_visitor v;
863 
864    (*rvalue)->accept(&v);
865    v.handle_rvalue(rvalue);
866 
867    /* We don’t need to add the final conversion if the final type has been
868     * converted to bool
869     */
870    if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) {
871       *rvalue = convert_precision(true, *rvalue);
872    }
873 }
874 
875 ir_visitor_status
visit_enter(ir_call * ir)876 find_precision_visitor::visit_enter(ir_call *ir)
877 {
878    ir_rvalue_enter_visitor::visit_enter(ir);
879 
880    ir_variable *return_var =
881       ir->return_deref ? ir->return_deref->variable_referenced() : NULL;
882 
883    /* Don't do anything for image_load here. We have only changed the return
884     * value to mediump/lowp, so that following instructions can use reduced
885     * precision.
886     *
887     * The return value type of the intrinsic itself isn't changed here, but
888     * can be changed in NIR if all users use the *2*mp opcode.
889     */
890    if (ir->callee->intrinsic_id == ir_intrinsic_image_load)
891       return visit_continue;
892 
893    /* If this is a call to a builtin and the find_lowerable_rvalues_visitor
894     * overrode the precision of the temporary return variable, then we can
895     * replace the builtin implementation with a lowered version.
896     */
897 
898    if (!ir->callee->is_builtin() ||
899        ir->callee->is_intrinsic() ||
900        return_var == NULL ||
901        (return_var->data.precision != GLSL_PRECISION_MEDIUM &&
902         return_var->data.precision != GLSL_PRECISION_LOW))
903       return visit_continue;
904 
905    ir->callee = map_builtin(ir->callee);
906    ir->generate_inline(ir);
907    ir->remove();
908 
909    return visit_continue_with_parent;
910 }
911 
912 ir_function_signature *
map_builtin(ir_function_signature * sig)913 find_precision_visitor::map_builtin(ir_function_signature *sig)
914 {
915    if (lowered_builtins == NULL) {
916       lowered_builtins = _mesa_pointer_hash_table_create(NULL);
917       clone_ht =_mesa_pointer_hash_table_create(NULL);
918       lowered_builtin_mem_ctx = ralloc_context(NULL);
919    } else {
920       struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig);
921       if (entry)
922          return (ir_function_signature *) entry->data;
923    }
924 
925    ir_function_signature *lowered_sig =
926       sig->clone(lowered_builtin_mem_ctx, clone_ht);
927 
928    /* Functions that always return mediump or lowp should keep their
929     * parameters intact, because they can be highp. NIR can lower
930     * the up-conversion for parameters if needed.
931     */
932    if (!function_always_returns_mediump_or_lowp(sig->function_name())) {
933       foreach_in_list(ir_variable, param, &lowered_sig->parameters) {
934          param->data.precision = GLSL_PRECISION_MEDIUM;
935       }
936    }
937 
938    lower_precision(options, &lowered_sig->body);
939 
940    _mesa_hash_table_clear(clone_ht, NULL);
941 
942    _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig);
943 
944    return lowered_sig;
945 }
946 
find_precision_visitor(const struct gl_shader_compiler_options * options)947 find_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options)
948    : lowerable_rvalues(_mesa_pointer_set_create(NULL)),
949      lowered_builtins(NULL),
950      clone_ht(NULL),
951      lowered_builtin_mem_ctx(NULL),
952      options(options)
953 {
954 }
955 
~find_precision_visitor()956 find_precision_visitor::~find_precision_visitor()
957 {
958    _mesa_set_destroy(lowerable_rvalues, NULL);
959 
960    if (lowered_builtins) {
961       _mesa_hash_table_destroy(lowered_builtins, NULL);
962       _mesa_hash_table_destroy(clone_ht, NULL);
963       ralloc_free(lowered_builtin_mem_ctx);
964    }
965 }
966 
967 /* Lowering opcodes to 16 bits is not enough for programs with control flow
968  * (and the ?: operator, which is represented by if-then-else in the IR),
969  * because temporary variables, which are used for passing values between
970  * code blocks, are not lowered, resulting in 32-bit phis in NIR.
971  *
972  * First change the variable types to 16 bits, then change all ir_dereference
973  * types to 16 bits.
974  */
975 class lower_variables_visitor : public ir_rvalue_enter_visitor {
976 public:
lower_variables_visitor(const struct gl_shader_compiler_options * options)977    lower_variables_visitor(const struct gl_shader_compiler_options *options)
978       : options(options) {
979       lower_vars = _mesa_pointer_set_create(NULL);
980    }
981 
~lower_variables_visitor()982    virtual ~lower_variables_visitor()
983    {
984       _mesa_set_destroy(lower_vars, NULL);
985    }
986 
987    virtual ir_visitor_status visit(ir_variable *var);
988    virtual ir_visitor_status visit_enter(ir_assignment *ir);
989    virtual ir_visitor_status visit_enter(ir_return *ir);
990    virtual ir_visitor_status visit_enter(ir_call *ir);
991    virtual void handle_rvalue(ir_rvalue **rvalue);
992 
993    void fix_types_in_deref_chain(ir_dereference *ir);
994    void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs,
995                                  bool insert_before);
996 
997    const struct gl_shader_compiler_options *options;
998    set *lower_vars;
999 };
1000 
1001 static void
lower_constant(ir_constant * ir)1002 lower_constant(ir_constant *ir)
1003 {
1004    if (ir->type->is_array()) {
1005       for (int i = 0; i < ir->type->array_size(); i++)
1006          lower_constant(ir->get_array_element(i));
1007 
1008       ir->type = lower_glsl_type(ir->type);
1009       return;
1010    }
1011 
1012    ir->type = lower_glsl_type(ir->type);
1013    ir_constant_data value;
1014 
1015    if (ir->type->base_type == GLSL_TYPE_FLOAT16) {
1016       for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++)
1017          value.f16[i] = _mesa_float_to_half(ir->value.f[i]);
1018    } else if (ir->type->base_type == GLSL_TYPE_INT16) {
1019       for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++)
1020          value.i16[i] = ir->value.i[i];
1021    } else if (ir->type->base_type == GLSL_TYPE_UINT16) {
1022       for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++)
1023          value.u16[i] = ir->value.u[i];
1024    } else {
1025       unreachable("invalid type");
1026    }
1027 
1028    ir->value = value;
1029 }
1030 
1031 ir_visitor_status
visit(ir_variable * var)1032 lower_variables_visitor::visit(ir_variable *var)
1033 {
1034    if ((var->data.mode != ir_var_temporary &&
1035         var->data.mode != ir_var_auto &&
1036         /* Lower uniforms but not UBOs. */
1037         (var->data.mode != ir_var_uniform ||
1038          var->is_in_buffer_block() ||
1039          !(options->LowerPrecisionFloat16Uniforms &&
1040            var->type->without_array()->base_type == GLSL_TYPE_FLOAT))) ||
1041        !var->type->without_array()->is_32bit() ||
1042        (var->data.precision != GLSL_PRECISION_MEDIUM &&
1043         var->data.precision != GLSL_PRECISION_LOW) ||
1044        !can_lower_type(options, var->type))
1045       return visit_continue;
1046 
1047    /* Lower constant initializers. */
1048    if (var->constant_value &&
1049        var->type == var->constant_value->type) {
1050       if (!options->LowerPrecisionConstants)
1051          return visit_continue;
1052       var->constant_value =
1053          var->constant_value->clone(ralloc_parent(var), NULL);
1054       lower_constant(var->constant_value);
1055    }
1056 
1057    if (var->constant_initializer &&
1058        var->type == var->constant_initializer->type) {
1059       if (!options->LowerPrecisionConstants)
1060          return visit_continue;
1061       var->constant_initializer =
1062          var->constant_initializer->clone(ralloc_parent(var), NULL);
1063       lower_constant(var->constant_initializer);
1064    }
1065 
1066    var->type = lower_glsl_type(var->type);
1067    _mesa_set_add(lower_vars, var);
1068 
1069    return visit_continue;
1070 }
1071 
1072 void
fix_types_in_deref_chain(ir_dereference * ir)1073 lower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir)
1074 {
1075    assert(ir->type->without_array()->is_32bit());
1076    assert(_mesa_set_search(lower_vars, ir->variable_referenced()));
1077 
1078    /* Fix the type in the dereference node. */
1079    ir->type = lower_glsl_type(ir->type);
1080 
1081    /* If it's an array, fix the types in the whole dereference chain. */
1082    for (ir_dereference_array *deref_array = ir->as_dereference_array();
1083         deref_array;
1084         deref_array = deref_array->array->as_dereference_array()) {
1085       assert(deref_array->array->type->without_array()->is_32bit());
1086       deref_array->array->type = lower_glsl_type(deref_array->array->type);
1087    }
1088 }
1089 
1090 void
convert_split_assignment(ir_dereference * lhs,ir_rvalue * rhs,bool insert_before)1091 lower_variables_visitor::convert_split_assignment(ir_dereference *lhs,
1092                                                   ir_rvalue *rhs,
1093                                                   bool insert_before)
1094 {
1095    void *mem_ctx = ralloc_parent(lhs);
1096 
1097    if (lhs->type->is_array()) {
1098       for (unsigned i = 0; i < lhs->type->length; i++) {
1099          ir_dereference *l, *r;
1100 
1101          l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL),
1102                                                new(mem_ctx) ir_constant(i));
1103          r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL),
1104                                                new(mem_ctx) ir_constant(i));
1105          convert_split_assignment(l, r, insert_before);
1106       }
1107       return;
1108    }
1109 
1110    assert(lhs->type->is_16bit() || lhs->type->is_32bit());
1111    assert(rhs->type->is_16bit() || rhs->type->is_32bit());
1112    assert(lhs->type->is_16bit() != rhs->type->is_16bit());
1113 
1114    ir_assignment *assign =
1115       new(mem_ctx) ir_assignment(lhs, convert_precision(lhs->type->is_32bit(), rhs));
1116 
1117    if (insert_before)
1118       base_ir->insert_before(assign);
1119    else
1120       base_ir->insert_after(assign);
1121 }
1122 
1123 ir_visitor_status
visit_enter(ir_assignment * ir)1124 lower_variables_visitor::visit_enter(ir_assignment *ir)
1125 {
1126    ir_dereference *lhs = ir->lhs;
1127    ir_variable *var = lhs->variable_referenced();
1128    ir_dereference *rhs_deref = ir->rhs->as_dereference();
1129    ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL;
1130    ir_constant *rhs_const = ir->rhs->as_constant();
1131 
1132    /* Legalize array assignments between lowered and non-lowered variables. */
1133    if (lhs->type->is_array() &&
1134        (rhs_var || rhs_const) &&
1135        (!rhs_var ||
1136         (var &&
1137          var->type->without_array()->is_16bit() !=
1138          rhs_var->type->without_array()->is_16bit())) &&
1139        (!rhs_const ||
1140         (var &&
1141          var->type->without_array()->is_16bit() &&
1142          rhs_const->type->without_array()->is_32bit()))) {
1143       assert(ir->rhs->type->is_array());
1144 
1145       /* Fix array assignments from lowered to non-lowered. */
1146       if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) {
1147          fix_types_in_deref_chain(rhs_deref);
1148          /* Convert to 32 bits for LHS. */
1149          convert_split_assignment(lhs, rhs_deref, true);
1150          ir->remove();
1151          return visit_continue;
1152       }
1153 
1154       /* Fix array assignments from non-lowered to lowered. */
1155       if (var &&
1156           _mesa_set_search(lower_vars, var) &&
1157           ir->rhs->type->without_array()->is_32bit()) {
1158          fix_types_in_deref_chain(lhs);
1159          /* Convert to 16 bits for LHS. */
1160          convert_split_assignment(lhs, ir->rhs, true);
1161          ir->remove();
1162          return visit_continue;
1163       }
1164    }
1165 
1166    /* Fix assignment types. */
1167    if (var &&
1168        _mesa_set_search(lower_vars, var)) {
1169       /* Fix the LHS type. */
1170       if (lhs->type->without_array()->is_32bit())
1171          fix_types_in_deref_chain(lhs);
1172 
1173       /* Fix the RHS type if it's a lowered variable. */
1174       if (rhs_var &&
1175           _mesa_set_search(lower_vars, rhs_var) &&
1176           rhs_deref->type->without_array()->is_32bit())
1177          fix_types_in_deref_chain(rhs_deref);
1178 
1179       /* Fix the RHS type if it's a non-array expression. */
1180       if (ir->rhs->type->is_32bit()) {
1181          ir_expression *expr = ir->rhs->as_expression();
1182 
1183          /* Convert the RHS to the LHS type. */
1184          if (expr &&
1185              (expr->operation == ir_unop_f162f ||
1186               expr->operation == ir_unop_i2i ||
1187               expr->operation == ir_unop_u2u) &&
1188              expr->operands[0]->type->is_16bit()) {
1189             /* If there is an "up" conversion, just remove it.
1190              * This is optional. We could as well execute the else statement and
1191              * let NIR eliminate the up+down conversions.
1192              */
1193             ir->rhs = expr->operands[0];
1194          } else {
1195             /* Add a "down" conversion operation to fix the type of RHS. */
1196             ir->rhs = convert_precision(false, ir->rhs);
1197          }
1198       }
1199    }
1200 
1201    return ir_rvalue_enter_visitor::visit_enter(ir);
1202 }
1203 
1204 ir_visitor_status
visit_enter(ir_return * ir)1205 lower_variables_visitor::visit_enter(ir_return *ir)
1206 {
1207    void *mem_ctx = ralloc_parent(ir);
1208 
1209    ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL;
1210    if (deref) {
1211       ir_variable *var = deref->variable_referenced();
1212 
1213       /* Fix the type of the return value. */
1214       if (var &&
1215           _mesa_set_search(lower_vars, var) &&
1216           deref->type->without_array()->is_32bit()) {
1217          /* Create a 32-bit temporary variable. */
1218          ir_variable *new_var =
1219             new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1220          base_ir->insert_before(new_var);
1221 
1222          /* Fix types in dereferences. */
1223          fix_types_in_deref_chain(deref);
1224 
1225          /* Convert to 32 bits for the return value. */
1226          convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1227                                   deref, true);
1228          ir->value = new(mem_ctx) ir_dereference_variable(new_var);
1229       }
1230    }
1231 
1232    return ir_rvalue_enter_visitor::visit_enter(ir);
1233 }
1234 
handle_rvalue(ir_rvalue ** rvalue)1235 void lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue)
1236 {
1237    ir_rvalue *ir = *rvalue;
1238 
1239    if (in_assignee || ir == NULL)
1240       return;
1241 
1242    ir_expression *expr = ir->as_expression();
1243    ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL;
1244 
1245    /* Remove f2fmp(float16). Same for int16 and uint16. */
1246    if (expr &&
1247        expr_op0_deref &&
1248        (expr->operation == ir_unop_f2fmp ||
1249         expr->operation == ir_unop_i2imp ||
1250         expr->operation == ir_unop_u2ump ||
1251         expr->operation == ir_unop_f2f16 ||
1252         expr->operation == ir_unop_i2i ||
1253         expr->operation == ir_unop_u2u) &&
1254        expr->type->without_array()->is_16bit() &&
1255        expr_op0_deref->type->without_array()->is_32bit() &&
1256        expr_op0_deref->variable_referenced() &&
1257        _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) {
1258       fix_types_in_deref_chain(expr_op0_deref);
1259 
1260       /* Remove f2fmp/i2imp/u2ump. */
1261       *rvalue = expr_op0_deref;
1262       return;
1263    }
1264 
1265    ir_dereference *deref = ir->as_dereference();
1266 
1267    if (deref) {
1268       ir_variable *var = deref->variable_referenced();
1269 
1270       /* var can be NULL if we are dereferencing ir_constant. */
1271       if (var &&
1272           _mesa_set_search(lower_vars, var) &&
1273           deref->type->without_array()->is_32bit()) {
1274          void *mem_ctx = ralloc_parent(ir);
1275 
1276          /* Create a 32-bit temporary variable. */
1277          ir_variable *new_var =
1278             new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary);
1279          base_ir->insert_before(new_var);
1280 
1281          /* Fix types in dereferences. */
1282          fix_types_in_deref_chain(deref);
1283 
1284          /* Convert to 32 bits for the rvalue. */
1285          convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1286                                   deref, true);
1287          *rvalue = new(mem_ctx) ir_dereference_variable(new_var);
1288       }
1289    }
1290 }
1291 
1292 ir_visitor_status
visit_enter(ir_call * ir)1293 lower_variables_visitor::visit_enter(ir_call *ir)
1294 {
1295    void *mem_ctx = ralloc_parent(ir);
1296 
1297    /* We can't pass 16-bit variables as 32-bit inout/out parameters. */
1298    foreach_two_lists(formal_node, &ir->callee->parameters,
1299                      actual_node, &ir->actual_parameters) {
1300       ir_dereference *param_deref =
1301          ((ir_rvalue *)actual_node)->as_dereference();
1302       ir_variable *param = (ir_variable *)formal_node;
1303 
1304       if (!param_deref)
1305             continue;
1306 
1307       ir_variable *var = param_deref->variable_referenced();
1308 
1309       /* var can be NULL if we are dereferencing ir_constant. */
1310       if (var &&
1311           _mesa_set_search(lower_vars, var) &&
1312           param->type->without_array()->is_32bit()) {
1313          fix_types_in_deref_chain(param_deref);
1314 
1315          /* Create a 32-bit temporary variable for the parameter. */
1316          ir_variable *new_var =
1317             new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary);
1318          base_ir->insert_before(new_var);
1319 
1320          /* Replace the parameter. */
1321          actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var));
1322 
1323          if (param->data.mode == ir_var_function_in ||
1324              param->data.mode == ir_var_function_inout) {
1325             /* Convert to 32 bits for passing in. */
1326             convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var),
1327                                      param_deref->clone(mem_ctx, NULL), true);
1328          }
1329          if (param->data.mode == ir_var_function_out ||
1330              param->data.mode == ir_var_function_inout) {
1331             /* Convert to 16 bits after returning. */
1332             convert_split_assignment(param_deref,
1333                                      new(mem_ctx) ir_dereference_variable(new_var),
1334                                      false);
1335          }
1336       }
1337    }
1338 
1339    /* Fix the type of return value dereferencies. */
1340    ir_dereference_variable *ret_deref = ir->return_deref;
1341    ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL;
1342 
1343    if (ret_var &&
1344        _mesa_set_search(lower_vars, ret_var) &&
1345        ret_deref->type->without_array()->is_32bit()) {
1346       /* Create a 32-bit temporary variable. */
1347       ir_variable *new_var =
1348          new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp",
1349                                   ir_var_temporary);
1350       base_ir->insert_before(new_var);
1351 
1352       /* Replace the return variable. */
1353       ret_deref->var = new_var;
1354 
1355       /* Convert to 16 bits after returning. */
1356       convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var),
1357                                new(mem_ctx) ir_dereference_variable(new_var),
1358                                false);
1359    }
1360 
1361    return ir_rvalue_enter_visitor::visit_enter(ir);
1362 }
1363 
1364 }
1365 
1366 void
lower_precision(const struct gl_shader_compiler_options * options,exec_list * instructions)1367 lower_precision(const struct gl_shader_compiler_options *options,
1368                 exec_list *instructions)
1369 {
1370    find_precision_visitor v(options);
1371    find_lowerable_rvalues(options, instructions, v.lowerable_rvalues);
1372    visit_list_elements(&v, instructions);
1373 
1374    lower_variables_visitor vars(options);
1375    visit_list_elements(&vars, instructions);
1376 }
1377