1 /**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_config.h"
30
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_rect.h"
35 #include "util/u_sse.h"
36
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42
43
44 #if defined(PIPE_ARCH_SSE)
45
46 #define FIXED15_ONE 0x7fff
47
48 /* Translate floating point value to 1.15 unsigned fixed-point.
49 */
50 static inline ushort
float_to_ufixed_1_15(float f)51 float_to_ufixed_1_15(float f)
52 {
53 return CLAMP((unsigned)(f * (float)FIXED15_ONE), 0, FIXED15_ONE);
54 }
55
56
57 /* Translate floating point value to 1.15 signed fixed-point.
58 */
59 static inline int16_t
float_to_sfixed_1_15(float f)60 float_to_sfixed_1_15(float f)
61 {
62 return CLAMP((signed)(f * (float)FIXED15_ONE), -FIXED15_ONE, FIXED15_ONE);
63 }
64
65
66 /* Interpolate in 1.15 space, but produce a packed row of 0.8 values.
67 */
68 static const uint32_t *
interp_0_8(struct lp_linear_elem * elem)69 interp_0_8(struct lp_linear_elem *elem)
70 {
71 struct lp_linear_interp *interp = (struct lp_linear_interp *)elem;
72 uint32_t *row = interp->row;
73 __m128i a0 = interp->a0;
74 __m128i dadx = interp->dadx;
75 int width = (interp->width + 3) & ~3;
76 int i;
77
78 for (i = 0; i < width; i += 4) {
79 __m128i l, h;
80
81 l = _mm_srai_epi16(a0, 7);
82 a0 = _mm_add_epi16(a0, dadx);
83
84 h = _mm_srai_epi16(a0, 7);
85 a0 = _mm_add_epi16(a0, dadx);
86
87 *(__m128i *)&row[i] = _mm_packus_epi16(l, h);
88 }
89
90 interp->a0 = _mm_add_epi16(interp->a0, interp->dady);
91 return interp->row;
92 }
93
94 static const uint32_t *
interp_noop(struct lp_linear_elem * elem)95 interp_noop(struct lp_linear_elem *elem)
96 {
97 struct lp_linear_interp *interp = (struct lp_linear_interp *)elem;
98 return interp->row;
99 }
100
101
102 static const uint32_t *
interp_check(struct lp_linear_elem * elem)103 interp_check(struct lp_linear_elem *elem)
104 {
105 struct lp_linear_interp *interp = (struct lp_linear_interp *)elem;
106 interp->row[0] = 1;
107 return interp->row;
108 }
109
110 /* Not quite a noop - we use row[0] to track whether this gets called
111 * or not, so we can optimize which interpolants we care about.
112 */
113 void
lp_linear_init_noop_interp(struct lp_linear_interp * interp)114 lp_linear_init_noop_interp(struct lp_linear_interp *interp)
115 {
116 interp->row[0] = 0;
117 interp->base.fetch = interp_check;
118 }
119
120 boolean
lp_linear_init_interp(struct lp_linear_interp * interp,int x,int y,int width,int height,unsigned usage_mask,boolean perspective,float oow,const float * a0,const float * dadx,const float * dady)121 lp_linear_init_interp(struct lp_linear_interp *interp,
122 int x, int y, int width, int height,
123 unsigned usage_mask,
124 boolean perspective,
125 float oow,
126 const float *a0,
127 const float *dadx,
128 const float *dady)
129 {
130 float s0[4];
131 float dsdx[4];
132 float dsdy[4];
133 int16_t s0_fp[8];
134 int16_t dsdx_fp[4];
135 int16_t dsdy_fp[4];
136 int j;
137
138 /* Zero coefficients to avoid using uninitialised values */
139 memset(s0, 0, sizeof(s0));
140 memset(dsdx, 0, sizeof(dsdx));
141 memset(dsdy, 0, sizeof(dsdy));
142 memset(s0_fp, 0, sizeof(s0_fp));
143 memset(dsdx_fp, 0, sizeof(dsdx_fp));
144 memset(dsdy_fp, 0, sizeof(dsdy_fp));
145
146 if (perspective) {
147 for (j = 0; j < 4; j++) {
148 if (usage_mask & (1<<j)) {
149 s0[j] = a0[j] * oow;
150 dsdx[j] = dadx[j] * oow;
151 dsdy[j] = dady[j] * oow;
152 }
153 }
154 } else {
155 for (j = 0; j < 4; j++) {
156 if (usage_mask & (1<<j)) {
157 s0[j] = a0[j];
158 dsdx[j] = dadx[j];
159 dsdy[j] = dady[j];
160 }
161 }
162 }
163
164 s0[0] += x * dsdx[0] + y * dsdy[0];
165 s0[1] += x * dsdx[1] + y * dsdy[1];
166 s0[2] += x * dsdx[2] + y * dsdy[2];
167 s0[3] += x * dsdx[3] + y * dsdy[3];
168
169 /* XXX: lift all of this into the rectangle setup code.
170 *
171 * For rectangles with linear shaders, at setup time:
172 * - if w is constant (else mark as non-fastpath)
173 * - premultiply perspective interpolants by w
174 * - set w = 1 in position
175 * - check all interpolants for min/max 0..1 (else mark as
176 * non-fastpath)
177 */
178 for (j = 0; j < 4; j++) {
179 if (usage_mask & (1<<j)) {
180 float a = s0[j];
181 float b = s0[j] + (width - 1) * dsdx[j];
182 float c = s0[j] + (height - 1) * dsdy[j];
183 float d = s0[j] + (height - 1) * dsdy[j] + (width - 1) * dsdx[j];
184
185 if (MIN4(a,b,c,d) < 0.0)
186 FAIL("min < 0.0");
187
188 if (MAX4(a,b,c,d) > 1.0)
189 FAIL("max > 1.0");
190
191 dsdx_fp[j] = float_to_sfixed_1_15(dsdx[j]);
192 dsdy_fp[j] = float_to_sfixed_1_15(dsdy[j]);
193
194 s0_fp[j] = float_to_ufixed_1_15(s0[j]);
195 s0_fp[j + 4] = s0_fp[j] + dsdx_fp[j];
196
197 dsdx_fp[j] *= 2;
198 }
199 }
200
201 interp->width = align(width, 4);
202
203 interp->a0 = _mm_setr_epi16(s0_fp[2], s0_fp[1], s0_fp[0], s0_fp[3],
204 s0_fp[6], s0_fp[5], s0_fp[4], s0_fp[7]);
205
206 interp->dadx = _mm_setr_epi16(dsdx_fp[2], dsdx_fp[1], dsdx_fp[0], dsdx_fp[3],
207 dsdx_fp[2], dsdx_fp[1], dsdx_fp[0], dsdx_fp[3]);
208
209 interp->dady = _mm_setr_epi16(dsdy_fp[2], dsdy_fp[1], dsdy_fp[0], dsdy_fp[3],
210 dsdy_fp[2], dsdy_fp[1], dsdy_fp[0], dsdy_fp[3]);
211
212 /* If the value is y-invariant, eagerly calculate it here and then
213 * always return the precalculated value.
214 */
215 if (dsdy[0] == 0 &&
216 dsdy[1] == 0 &&
217 dsdy[2] == 0 &&
218 dsdy[3] == 0)
219 {
220 interp_0_8(&interp->base);
221 interp->base.fetch = interp_noop;
222 }
223 else {
224 interp->base.fetch = interp_0_8;
225 }
226
227 return TRUE;
228 }
229
230 #else
231 boolean
lp_linear_init_interp(struct lp_linear_interp * interp,int x,int y,int width,int height,unsigned usage_mask,boolean perspective,float oow,const float * a0,const float * dadx,const float * dady)232 lp_linear_init_interp(struct lp_linear_interp *interp,
233 int x, int y, int width, int height,
234 unsigned usage_mask,
235 boolean perspective,
236 float oow,
237 const float *a0,
238 const float *dadx,
239 const float *dady)
240 {
241 return FALSE;
242 }
243 #endif
244