1 /*
2 * Copyright © 2015 Connor Abbott
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "brw_fs.h"
25 #include "brw_cfg.h"
26 #include "brw_fs_builder.h"
27
28 using namespace brw;
29
30 static bool
supports_type_conversion(const fs_inst * inst)31 supports_type_conversion(const fs_inst *inst) {
32 switch (inst->opcode) {
33 case BRW_OPCODE_MOV:
34 case SHADER_OPCODE_MOV_INDIRECT:
35 return true;
36 case BRW_OPCODE_SEL:
37 return inst->dst.type == get_exec_type(inst);
38 default:
39 /* FIXME: We assume the opcodes don't explicitly mentioned
40 * before just work fine with arbitrary conversions.
41 */
42 return true;
43 }
44 }
45
46 bool
lower_conversions()47 fs_visitor::lower_conversions()
48 {
49 bool progress = false;
50
51 foreach_block_and_inst(block, fs_inst, inst, cfg) {
52 const fs_builder ibld(this, block, inst);
53 fs_reg dst = inst->dst;
54 bool saturate = inst->saturate;
55
56 if (supports_type_conversion(inst)) {
57 if (get_exec_type_size(inst) == 8 && type_sz(inst->dst.type) < 8) {
58 /* From the Broadwell PRM, 3D Media GPGPU, "Double Precision Float to
59 * Single Precision Float":
60 *
61 * The upper Dword of every Qword will be written with undefined
62 * value when converting DF to F.
63 *
64 * So we need to allocate a temporary that's two registers, and then do
65 * a strided MOV to get the lower DWord of every Qword that has the
66 * result.
67 */
68 fs_reg temp = ibld.vgrf(get_exec_type(inst));
69 fs_reg strided_temp = subscript(temp, dst.type, 0);
70
71 assert(inst->size_written == inst->dst.component_size(inst->exec_size));
72 inst->dst = strided_temp;
73 inst->saturate = false;
74 /* As it is an strided destination, we write n-times more being n the
75 * size ratio between source and destination types. Update
76 * size_written accordingly.
77 */
78 inst->size_written = inst->dst.component_size(inst->exec_size);
79 ibld.at(block, inst->next).MOV(dst, strided_temp)->saturate = saturate;
80
81 progress = true;
82 }
83 } else {
84 fs_reg temp0 = ibld.vgrf(get_exec_type(inst));
85
86 assert(inst->size_written == inst->dst.component_size(inst->exec_size));
87 inst->dst = temp0;
88 /* As it is an strided destination, we write n-times more being n the
89 * size ratio between source and destination types. Update
90 * size_written accordingly.
91 */
92 inst->size_written = inst->dst.component_size(inst->exec_size);
93 inst->saturate = false;
94 /* Now, do the conversion to original destination's type. In next iteration,
95 * we will lower it if it is a d2f conversion.
96 */
97 ibld.at(block, inst->next).MOV(dst, temp0)->saturate = saturate;
98
99 progress = true;
100 }
101 }
102
103 if (progress)
104 invalidate_live_intervals();
105
106 return progress;
107 }
108