1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 * Author: Tom Stellard <thomas.stellard@amd.com>
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include <stdio.h>
8 #include "util/u_bitcast.h"
9 #include "radeon_compiler.h"
10 #include "radeon_compiler_util.h"
11 #include "radeon_dataflow.h"
12 #include "radeon_program.h"
13 #include "radeon_program_constants.h"
14 #include "radeon_swizzle.h"
15
16 #define VERBOSE 0
17
18 #define DBG(...) \
19 do { \
20 if (VERBOSE) \
21 fprintf(stderr, __VA_ARGS__); \
22 } while (0)
23
24 /* IEEE-754:
25 * 22:0 mantissa
26 * 30:23 exponent
27 * 31 sign
28 *
29 * R300:
30 * 0:2 mantissa
31 * 3:6 exponent (bias 7)
32 */
33 static int
ieee_754_to_r300_float(float f,unsigned char * r300_float_out)34 ieee_754_to_r300_float(float f, unsigned char *r300_float_out)
35 {
36 unsigned float_bits = u_bitcast_f2u(f);
37 /* XXX: Handle big-endian */
38 unsigned mantissa = float_bits & 0x007fffff;
39 unsigned biased_exponent = (float_bits & 0x7f800000) >> 23;
40 unsigned negate = !!(float_bits & 0x80000000);
41 int exponent = biased_exponent - 127;
42 unsigned mantissa_mask = 0xff8fffff;
43 unsigned r300_exponent, r300_mantissa;
44
45 DBG("Converting %f (0x%x) to 7-bit:\n", f, float_bits);
46 DBG("Raw exponent = %d\n", exponent);
47
48 if (exponent < -7 || exponent > 8) {
49 DBG("Failed exponent out of range\n\n");
50 return 0;
51 }
52
53 if (mantissa & mantissa_mask) {
54 DBG("Failed mantissa has too many bits:\n"
55 "mantissa=0x%x mantissa_mask=0x%x, and=0x%x\n\n",
56 mantissa, mantissa_mask, mantissa & mantissa_mask);
57 return 0;
58 }
59
60 r300_exponent = exponent + 7;
61 r300_mantissa = (mantissa & ~mantissa_mask) >> 20;
62 *r300_float_out = r300_mantissa | (r300_exponent << 3);
63
64 DBG("Success! r300_float = 0x%x\n\n", *r300_float_out);
65
66 if (negate)
67 return -1;
68 else
69 return 1;
70 }
71
72 void
rc_inline_literals(struct radeon_compiler * c,void * user)73 rc_inline_literals(struct radeon_compiler *c, void *user)
74 {
75 struct rc_instruction *inst;
76
77 for (inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) {
78 const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
79
80 unsigned src_idx;
81 struct rc_constant *constant;
82 float float_value;
83 unsigned char r300_float = 0;
84 int ret;
85
86 /* XXX: Handle presub */
87
88 /* We aren't using rc_for_all_reads_src here, because presub
89 * sources need to be handled differently. */
90 for (src_idx = 0; src_idx < info->NumSrcRegs; src_idx++) {
91 unsigned use_literal = 0;
92 unsigned swz, chan;
93 struct rc_src_register src_reg = inst->U.I.SrcReg[src_idx];
94 if (src_reg.File != RC_FILE_CONSTANT) {
95 continue;
96 }
97 constant = &c->Program.Constants.Constants[src_reg.Index];
98 if (constant->Type != RC_CONSTANT_IMMEDIATE) {
99 continue;
100 }
101 for (chan = 0; chan < 4; chan++) {
102 unsigned char r300_float_tmp;
103 swz = GET_SWZ(src_reg.Swizzle, chan);
104 if (swz >= RC_SWIZZLE_ZERO) {
105 continue;
106 }
107 float_value = constant->u.Immediate[swz];
108 ret = ieee_754_to_r300_float(float_value, &r300_float_tmp);
109 if (!ret || (use_literal && r300_float != r300_float_tmp)) {
110 use_literal = 0;
111 break;
112 }
113
114 if (ret == -1 && src_reg.Abs) {
115 use_literal = 0;
116 break;
117 }
118
119 if (!use_literal) {
120 r300_float = r300_float_tmp;
121 use_literal = 1;
122 }
123
124 /* We can use any swizzle, so if this is ADD it might
125 * be smart to us the same swizzle as the other src uses
126 * so that we potentially enable presubtract later.
127 * Use RC_SWIZZLE_W otherwise, so it will become one of
128 * the alpha sources.
129 */
130 if (info->Opcode == RC_OPCODE_ADD &&
131 GET_SWZ(inst->U.I.SrcReg[1 - src_idx].Swizzle, chan) == chan) {
132 SET_SWZ(src_reg.Swizzle, chan, chan);
133 } else {
134 SET_SWZ(src_reg.Swizzle, chan, RC_SWIZZLE_W);
135 }
136 if (ret == -1) {
137 src_reg.Negate ^= (1 << chan);
138 }
139 }
140
141 src_reg.File = RC_FILE_INLINE;
142 src_reg.Index = r300_float;
143 if (!use_literal || !c->SwizzleCaps->IsNative(inst->U.I.Opcode, src_reg)) {
144 continue;
145 }
146 inst->U.I.SrcReg[src_idx] = src_reg;
147 }
148 }
149 }
150