• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**************************************************************************
2  *
3  * Copyright 2007-2008 VMware, Inc.
4  * All Rights Reserved.
5  * Copyright 2009-2010 VMware, Inc.  All rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * TGSI interpreter/executor.
31  *
32  * Flow control information:
33  *
34  * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35  * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36  * care since a condition may be true for some quad components but false
37  * for other components.
38  *
39  * We basically execute all statements (even if they're in the part of
40  * an IF/ELSE clause that's "not taken") and use a special mask to
41  * control writing to destination registers.  This is the ExecMask.
42  * See store_dest().
43  *
44  * The ExecMask is computed from three other masks (CondMask, LoopMask and
45  * ContMask) which are controlled by the flow control instructions (namely:
46  * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47  *
48  *
49  * Authors:
50  *   Michal Krol
51  *   Brian Paul
52  */
53 
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_half.h"
62 #include "util/u_memory.h"
63 #include "util/u_math.h"
64 #include "util/rounding.h"
65 
66 
67 #define DEBUG_EXECUTION 0
68 
69 
70 #define FAST_MATH 0
71 
72 #define TILE_TOP_LEFT     0
73 #define TILE_TOP_RIGHT    1
74 #define TILE_BOTTOM_LEFT  2
75 #define TILE_BOTTOM_RIGHT 3
76 
77 union tgsi_double_channel {
78    double d[TGSI_QUAD_SIZE];
79    unsigned u[TGSI_QUAD_SIZE][2];
80    uint64_t u64[TGSI_QUAD_SIZE];
81    int64_t i64[TGSI_QUAD_SIZE];
82 };
83 
84 struct tgsi_double_vector {
85    union tgsi_double_channel xy;
86    union tgsi_double_channel zw;
87 };
88 
89 static void
micro_abs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)90 micro_abs(union tgsi_exec_channel *dst,
91           const union tgsi_exec_channel *src)
92 {
93    dst->f[0] = fabsf(src->f[0]);
94    dst->f[1] = fabsf(src->f[1]);
95    dst->f[2] = fabsf(src->f[2]);
96    dst->f[3] = fabsf(src->f[3]);
97 }
98 
99 static void
micro_arl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)100 micro_arl(union tgsi_exec_channel *dst,
101           const union tgsi_exec_channel *src)
102 {
103    dst->i[0] = (int)floorf(src->f[0]);
104    dst->i[1] = (int)floorf(src->f[1]);
105    dst->i[2] = (int)floorf(src->f[2]);
106    dst->i[3] = (int)floorf(src->f[3]);
107 }
108 
109 static void
micro_arr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)110 micro_arr(union tgsi_exec_channel *dst,
111           const union tgsi_exec_channel *src)
112 {
113    dst->i[0] = (int)floorf(src->f[0] + 0.5f);
114    dst->i[1] = (int)floorf(src->f[1] + 0.5f);
115    dst->i[2] = (int)floorf(src->f[2] + 0.5f);
116    dst->i[3] = (int)floorf(src->f[3] + 0.5f);
117 }
118 
119 static void
micro_ceil(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)120 micro_ceil(union tgsi_exec_channel *dst,
121            const union tgsi_exec_channel *src)
122 {
123    dst->f[0] = ceilf(src->f[0]);
124    dst->f[1] = ceilf(src->f[1]);
125    dst->f[2] = ceilf(src->f[2]);
126    dst->f[3] = ceilf(src->f[3]);
127 }
128 
129 static void
micro_clamp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)130 micro_clamp(union tgsi_exec_channel *dst,
131             const union tgsi_exec_channel *src0,
132             const union tgsi_exec_channel *src1,
133             const union tgsi_exec_channel *src2)
134 {
135    dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
136    dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
137    dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
138    dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
139 }
140 
141 static void
micro_cmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)142 micro_cmp(union tgsi_exec_channel *dst,
143           const union tgsi_exec_channel *src0,
144           const union tgsi_exec_channel *src1,
145           const union tgsi_exec_channel *src2)
146 {
147    dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
148    dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
149    dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
150    dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
151 }
152 
153 static void
micro_cos(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)154 micro_cos(union tgsi_exec_channel *dst,
155           const union tgsi_exec_channel *src)
156 {
157    dst->f[0] = cosf(src->f[0]);
158    dst->f[1] = cosf(src->f[1]);
159    dst->f[2] = cosf(src->f[2]);
160    dst->f[3] = cosf(src->f[3]);
161 }
162 
163 static void
micro_d2f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)164 micro_d2f(union tgsi_exec_channel *dst,
165           const union tgsi_double_channel *src)
166 {
167    dst->f[0] = (float)src->d[0];
168    dst->f[1] = (float)src->d[1];
169    dst->f[2] = (float)src->d[2];
170    dst->f[3] = (float)src->d[3];
171 }
172 
173 static void
micro_d2i(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)174 micro_d2i(union tgsi_exec_channel *dst,
175           const union tgsi_double_channel *src)
176 {
177    dst->i[0] = (int)src->d[0];
178    dst->i[1] = (int)src->d[1];
179    dst->i[2] = (int)src->d[2];
180    dst->i[3] = (int)src->d[3];
181 }
182 
183 static void
micro_d2u(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)184 micro_d2u(union tgsi_exec_channel *dst,
185           const union tgsi_double_channel *src)
186 {
187    dst->u[0] = (unsigned)src->d[0];
188    dst->u[1] = (unsigned)src->d[1];
189    dst->u[2] = (unsigned)src->d[2];
190    dst->u[3] = (unsigned)src->d[3];
191 }
192 static void
micro_dabs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)193 micro_dabs(union tgsi_double_channel *dst,
194            const union tgsi_double_channel *src)
195 {
196    dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
197    dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
198    dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
199    dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
200 }
201 
202 static void
micro_dadd(union tgsi_double_channel * dst,const union tgsi_double_channel * src)203 micro_dadd(union tgsi_double_channel *dst,
204           const union tgsi_double_channel *src)
205 {
206    dst->d[0] = src[0].d[0] + src[1].d[0];
207    dst->d[1] = src[0].d[1] + src[1].d[1];
208    dst->d[2] = src[0].d[2] + src[1].d[2];
209    dst->d[3] = src[0].d[3] + src[1].d[3];
210 }
211 
212 static void
micro_ddiv(union tgsi_double_channel * dst,const union tgsi_double_channel * src)213 micro_ddiv(union tgsi_double_channel *dst,
214           const union tgsi_double_channel *src)
215 {
216    dst->d[0] = src[0].d[0] / src[1].d[0];
217    dst->d[1] = src[0].d[1] / src[1].d[1];
218    dst->d[2] = src[0].d[2] / src[1].d[2];
219    dst->d[3] = src[0].d[3] / src[1].d[3];
220 }
221 
222 static void
micro_ddx(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)223 micro_ddx(union tgsi_exec_channel *dst,
224           const union tgsi_exec_channel *src)
225 {
226    dst->f[0] =
227    dst->f[1] =
228    dst->f[2] =
229    dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
230 }
231 
232 static void
micro_ddy(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)233 micro_ddy(union tgsi_exec_channel *dst,
234           const union tgsi_exec_channel *src)
235 {
236    dst->f[0] =
237    dst->f[1] =
238    dst->f[2] =
239    dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
240 }
241 
242 static void
micro_dmul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)243 micro_dmul(union tgsi_double_channel *dst,
244            const union tgsi_double_channel *src)
245 {
246    dst->d[0] = src[0].d[0] * src[1].d[0];
247    dst->d[1] = src[0].d[1] * src[1].d[1];
248    dst->d[2] = src[0].d[2] * src[1].d[2];
249    dst->d[3] = src[0].d[3] * src[1].d[3];
250 }
251 
252 static void
micro_dmax(union tgsi_double_channel * dst,const union tgsi_double_channel * src)253 micro_dmax(union tgsi_double_channel *dst,
254            const union tgsi_double_channel *src)
255 {
256    dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0];
257    dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1];
258    dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2];
259    dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3];
260 }
261 
262 static void
micro_dmin(union tgsi_double_channel * dst,const union tgsi_double_channel * src)263 micro_dmin(union tgsi_double_channel *dst,
264            const union tgsi_double_channel *src)
265 {
266    dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0];
267    dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1];
268    dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2];
269    dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3];
270 }
271 
272 static void
micro_dneg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)273 micro_dneg(union tgsi_double_channel *dst,
274            const union tgsi_double_channel *src)
275 {
276    dst->d[0] = -src->d[0];
277    dst->d[1] = -src->d[1];
278    dst->d[2] = -src->d[2];
279    dst->d[3] = -src->d[3];
280 }
281 
282 static void
micro_dslt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)283 micro_dslt(union tgsi_double_channel *dst,
284            const union tgsi_double_channel *src)
285 {
286    dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
287    dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
288    dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
289    dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
290 }
291 
292 static void
micro_dsne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)293 micro_dsne(union tgsi_double_channel *dst,
294            const union tgsi_double_channel *src)
295 {
296    dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
297    dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
298    dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
299    dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
300 }
301 
302 static void
micro_dsge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)303 micro_dsge(union tgsi_double_channel *dst,
304            const union tgsi_double_channel *src)
305 {
306    dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
307    dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
308    dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
309    dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
310 }
311 
312 static void
micro_dseq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)313 micro_dseq(union tgsi_double_channel *dst,
314            const union tgsi_double_channel *src)
315 {
316    dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
317    dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
318    dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
319    dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
320 }
321 
322 static void
micro_drcp(union tgsi_double_channel * dst,const union tgsi_double_channel * src)323 micro_drcp(union tgsi_double_channel *dst,
324            const union tgsi_double_channel *src)
325 {
326    dst->d[0] = 1.0 / src->d[0];
327    dst->d[1] = 1.0 / src->d[1];
328    dst->d[2] = 1.0 / src->d[2];
329    dst->d[3] = 1.0 / src->d[3];
330 }
331 
332 static void
micro_dsqrt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)333 micro_dsqrt(union tgsi_double_channel *dst,
334             const union tgsi_double_channel *src)
335 {
336    dst->d[0] = sqrt(src->d[0]);
337    dst->d[1] = sqrt(src->d[1]);
338    dst->d[2] = sqrt(src->d[2]);
339    dst->d[3] = sqrt(src->d[3]);
340 }
341 
342 static void
micro_drsq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)343 micro_drsq(union tgsi_double_channel *dst,
344           const union tgsi_double_channel *src)
345 {
346    dst->d[0] = 1.0 / sqrt(src->d[0]);
347    dst->d[1] = 1.0 / sqrt(src->d[1]);
348    dst->d[2] = 1.0 / sqrt(src->d[2]);
349    dst->d[3] = 1.0 / sqrt(src->d[3]);
350 }
351 
352 static void
micro_dmad(union tgsi_double_channel * dst,const union tgsi_double_channel * src)353 micro_dmad(union tgsi_double_channel *dst,
354            const union tgsi_double_channel *src)
355 {
356    dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
357    dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
358    dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
359    dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
360 }
361 
362 static void
micro_dfrac(union tgsi_double_channel * dst,const union tgsi_double_channel * src)363 micro_dfrac(union tgsi_double_channel *dst,
364             const union tgsi_double_channel *src)
365 {
366    dst->d[0] = src->d[0] - floor(src->d[0]);
367    dst->d[1] = src->d[1] - floor(src->d[1]);
368    dst->d[2] = src->d[2] - floor(src->d[2]);
369    dst->d[3] = src->d[3] - floor(src->d[3]);
370 }
371 
372 static void
micro_dldexp(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)373 micro_dldexp(union tgsi_double_channel *dst,
374              const union tgsi_double_channel *src0,
375              union tgsi_exec_channel *src1)
376 {
377    dst->d[0] = ldexp(src0->d[0], src1->i[0]);
378    dst->d[1] = ldexp(src0->d[1], src1->i[1]);
379    dst->d[2] = ldexp(src0->d[2], src1->i[2]);
380    dst->d[3] = ldexp(src0->d[3], src1->i[3]);
381 }
382 
383 static void
micro_dfracexp(union tgsi_double_channel * dst,union tgsi_exec_channel * dst_exp,const union tgsi_double_channel * src)384 micro_dfracexp(union tgsi_double_channel *dst,
385                union tgsi_exec_channel *dst_exp,
386                const union tgsi_double_channel *src)
387 {
388    dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
389    dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
390    dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
391    dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
392 }
393 
394 static void
micro_exp2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)395 micro_exp2(union tgsi_exec_channel *dst,
396            const union tgsi_exec_channel *src)
397 {
398 #if FAST_MATH
399    dst->f[0] = util_fast_exp2(src->f[0]);
400    dst->f[1] = util_fast_exp2(src->f[1]);
401    dst->f[2] = util_fast_exp2(src->f[2]);
402    dst->f[3] = util_fast_exp2(src->f[3]);
403 #else
404 #if DEBUG
405    /* Inf is okay for this instruction, so clamp it to silence assertions. */
406    uint i;
407    union tgsi_exec_channel clamped;
408 
409    for (i = 0; i < 4; i++) {
410       if (src->f[i] > 127.99999f) {
411          clamped.f[i] = 127.99999f;
412       } else if (src->f[i] < -126.99999f) {
413          clamped.f[i] = -126.99999f;
414       } else {
415          clamped.f[i] = src->f[i];
416       }
417    }
418    src = &clamped;
419 #endif /* DEBUG */
420 
421    dst->f[0] = powf(2.0f, src->f[0]);
422    dst->f[1] = powf(2.0f, src->f[1]);
423    dst->f[2] = powf(2.0f, src->f[2]);
424    dst->f[3] = powf(2.0f, src->f[3]);
425 #endif /* FAST_MATH */
426 }
427 
428 static void
micro_f2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)429 micro_f2d(union tgsi_double_channel *dst,
430           const union tgsi_exec_channel *src)
431 {
432    dst->d[0] = (double)src->f[0];
433    dst->d[1] = (double)src->f[1];
434    dst->d[2] = (double)src->f[2];
435    dst->d[3] = (double)src->f[3];
436 }
437 
438 static void
micro_flr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)439 micro_flr(union tgsi_exec_channel *dst,
440           const union tgsi_exec_channel *src)
441 {
442    dst->f[0] = floorf(src->f[0]);
443    dst->f[1] = floorf(src->f[1]);
444    dst->f[2] = floorf(src->f[2]);
445    dst->f[3] = floorf(src->f[3]);
446 }
447 
448 static void
micro_frc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)449 micro_frc(union tgsi_exec_channel *dst,
450           const union tgsi_exec_channel *src)
451 {
452    dst->f[0] = src->f[0] - floorf(src->f[0]);
453    dst->f[1] = src->f[1] - floorf(src->f[1]);
454    dst->f[2] = src->f[2] - floorf(src->f[2]);
455    dst->f[3] = src->f[3] - floorf(src->f[3]);
456 }
457 
458 static void
micro_i2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)459 micro_i2d(union tgsi_double_channel *dst,
460           const union tgsi_exec_channel *src)
461 {
462    dst->d[0] = (double)src->i[0];
463    dst->d[1] = (double)src->i[1];
464    dst->d[2] = (double)src->i[2];
465    dst->d[3] = (double)src->i[3];
466 }
467 
468 static void
micro_iabs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)469 micro_iabs(union tgsi_exec_channel *dst,
470            const union tgsi_exec_channel *src)
471 {
472    dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
473    dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
474    dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
475    dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
476 }
477 
478 static void
micro_ineg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)479 micro_ineg(union tgsi_exec_channel *dst,
480            const union tgsi_exec_channel *src)
481 {
482    dst->i[0] = -src->i[0];
483    dst->i[1] = -src->i[1];
484    dst->i[2] = -src->i[2];
485    dst->i[3] = -src->i[3];
486 }
487 
488 static void
micro_lg2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)489 micro_lg2(union tgsi_exec_channel *dst,
490           const union tgsi_exec_channel *src)
491 {
492 #if FAST_MATH
493    dst->f[0] = util_fast_log2(src->f[0]);
494    dst->f[1] = util_fast_log2(src->f[1]);
495    dst->f[2] = util_fast_log2(src->f[2]);
496    dst->f[3] = util_fast_log2(src->f[3]);
497 #else
498    dst->f[0] = logf(src->f[0]) * 1.442695f;
499    dst->f[1] = logf(src->f[1]) * 1.442695f;
500    dst->f[2] = logf(src->f[2]) * 1.442695f;
501    dst->f[3] = logf(src->f[3]) * 1.442695f;
502 #endif
503 }
504 
505 static void
micro_lrp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)506 micro_lrp(union tgsi_exec_channel *dst,
507           const union tgsi_exec_channel *src0,
508           const union tgsi_exec_channel *src1,
509           const union tgsi_exec_channel *src2)
510 {
511    dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
512    dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
513    dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
514    dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
515 }
516 
517 static void
micro_mad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)518 micro_mad(union tgsi_exec_channel *dst,
519           const union tgsi_exec_channel *src0,
520           const union tgsi_exec_channel *src1,
521           const union tgsi_exec_channel *src2)
522 {
523    dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
524    dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
525    dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
526    dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
527 }
528 
529 static void
micro_mov(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)530 micro_mov(union tgsi_exec_channel *dst,
531           const union tgsi_exec_channel *src)
532 {
533    dst->u[0] = src->u[0];
534    dst->u[1] = src->u[1];
535    dst->u[2] = src->u[2];
536    dst->u[3] = src->u[3];
537 }
538 
539 static void
micro_rcp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)540 micro_rcp(union tgsi_exec_channel *dst,
541           const union tgsi_exec_channel *src)
542 {
543 #if 0 /* for debugging */
544    assert(src->f[0] != 0.0f);
545    assert(src->f[1] != 0.0f);
546    assert(src->f[2] != 0.0f);
547    assert(src->f[3] != 0.0f);
548 #endif
549    dst->f[0] = 1.0f / src->f[0];
550    dst->f[1] = 1.0f / src->f[1];
551    dst->f[2] = 1.0f / src->f[2];
552    dst->f[3] = 1.0f / src->f[3];
553 }
554 
555 static void
micro_rnd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)556 micro_rnd(union tgsi_exec_channel *dst,
557           const union tgsi_exec_channel *src)
558 {
559    dst->f[0] = _mesa_roundevenf(src->f[0]);
560    dst->f[1] = _mesa_roundevenf(src->f[1]);
561    dst->f[2] = _mesa_roundevenf(src->f[2]);
562    dst->f[3] = _mesa_roundevenf(src->f[3]);
563 }
564 
565 static void
micro_rsq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)566 micro_rsq(union tgsi_exec_channel *dst,
567           const union tgsi_exec_channel *src)
568 {
569 #if 0 /* for debugging */
570    assert(src->f[0] != 0.0f);
571    assert(src->f[1] != 0.0f);
572    assert(src->f[2] != 0.0f);
573    assert(src->f[3] != 0.0f);
574 #endif
575    dst->f[0] = 1.0f / sqrtf(src->f[0]);
576    dst->f[1] = 1.0f / sqrtf(src->f[1]);
577    dst->f[2] = 1.0f / sqrtf(src->f[2]);
578    dst->f[3] = 1.0f / sqrtf(src->f[3]);
579 }
580 
581 static void
micro_sqrt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)582 micro_sqrt(union tgsi_exec_channel *dst,
583            const union tgsi_exec_channel *src)
584 {
585    dst->f[0] = sqrtf(src->f[0]);
586    dst->f[1] = sqrtf(src->f[1]);
587    dst->f[2] = sqrtf(src->f[2]);
588    dst->f[3] = sqrtf(src->f[3]);
589 }
590 
591 static void
micro_seq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)592 micro_seq(union tgsi_exec_channel *dst,
593           const union tgsi_exec_channel *src0,
594           const union tgsi_exec_channel *src1)
595 {
596    dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
597    dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
598    dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
599    dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
600 }
601 
602 static void
micro_sge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)603 micro_sge(union tgsi_exec_channel *dst,
604           const union tgsi_exec_channel *src0,
605           const union tgsi_exec_channel *src1)
606 {
607    dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
608    dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
609    dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
610    dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
611 }
612 
613 static void
micro_sgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)614 micro_sgn(union tgsi_exec_channel *dst,
615           const union tgsi_exec_channel *src)
616 {
617    dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
618    dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
619    dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
620    dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
621 }
622 
623 static void
micro_isgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)624 micro_isgn(union tgsi_exec_channel *dst,
625           const union tgsi_exec_channel *src)
626 {
627    dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
628    dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
629    dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
630    dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
631 }
632 
633 static void
micro_sgt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)634 micro_sgt(union tgsi_exec_channel *dst,
635           const union tgsi_exec_channel *src0,
636           const union tgsi_exec_channel *src1)
637 {
638    dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
639    dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
640    dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
641    dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
642 }
643 
644 static void
micro_sin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)645 micro_sin(union tgsi_exec_channel *dst,
646           const union tgsi_exec_channel *src)
647 {
648    dst->f[0] = sinf(src->f[0]);
649    dst->f[1] = sinf(src->f[1]);
650    dst->f[2] = sinf(src->f[2]);
651    dst->f[3] = sinf(src->f[3]);
652 }
653 
654 static void
micro_sle(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)655 micro_sle(union tgsi_exec_channel *dst,
656           const union tgsi_exec_channel *src0,
657           const union tgsi_exec_channel *src1)
658 {
659    dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
660    dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
661    dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
662    dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
663 }
664 
665 static void
micro_slt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)666 micro_slt(union tgsi_exec_channel *dst,
667           const union tgsi_exec_channel *src0,
668           const union tgsi_exec_channel *src1)
669 {
670    dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
671    dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
672    dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
673    dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
674 }
675 
676 static void
micro_sne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)677 micro_sne(union tgsi_exec_channel *dst,
678           const union tgsi_exec_channel *src0,
679           const union tgsi_exec_channel *src1)
680 {
681    dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
682    dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
683    dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
684    dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
685 }
686 
687 static void
micro_trunc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)688 micro_trunc(union tgsi_exec_channel *dst,
689             const union tgsi_exec_channel *src)
690 {
691    dst->f[0] = truncf(src->f[0]);
692    dst->f[1] = truncf(src->f[1]);
693    dst->f[2] = truncf(src->f[2]);
694    dst->f[3] = truncf(src->f[3]);
695 }
696 
697 static void
micro_u2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)698 micro_u2d(union tgsi_double_channel *dst,
699           const union tgsi_exec_channel *src)
700 {
701    dst->d[0] = (double)src->u[0];
702    dst->d[1] = (double)src->u[1];
703    dst->d[2] = (double)src->u[2];
704    dst->d[3] = (double)src->u[3];
705 }
706 
707 static void
micro_i64abs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)708 micro_i64abs(union tgsi_double_channel *dst,
709              const union tgsi_double_channel *src)
710 {
711    dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
712    dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
713    dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
714    dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
715 }
716 
717 static void
micro_i64sgn(union tgsi_double_channel * dst,const union tgsi_double_channel * src)718 micro_i64sgn(union tgsi_double_channel *dst,
719              const union tgsi_double_channel *src)
720 {
721    dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
722    dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
723    dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
724    dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
725 }
726 
727 static void
micro_i64neg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)728 micro_i64neg(union tgsi_double_channel *dst,
729              const union tgsi_double_channel *src)
730 {
731    dst->i64[0] = -src->i64[0];
732    dst->i64[1] = -src->i64[1];
733    dst->i64[2] = -src->i64[2];
734    dst->i64[3] = -src->i64[3];
735 }
736 
737 static void
micro_u64seq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)738 micro_u64seq(union tgsi_double_channel *dst,
739            const union tgsi_double_channel *src)
740 {
741    dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
742    dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
743    dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
744    dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
745 }
746 
747 static void
micro_u64sne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)748 micro_u64sne(union tgsi_double_channel *dst,
749              const union tgsi_double_channel *src)
750 {
751    dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
752    dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
753    dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
754    dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
755 }
756 
757 static void
micro_i64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)758 micro_i64slt(union tgsi_double_channel *dst,
759              const union tgsi_double_channel *src)
760 {
761    dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
762    dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
763    dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
764    dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
765 }
766 
767 static void
micro_u64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)768 micro_u64slt(union tgsi_double_channel *dst,
769              const union tgsi_double_channel *src)
770 {
771    dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
772    dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
773    dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
774    dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
775 }
776 
777 static void
micro_i64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)778 micro_i64sge(union tgsi_double_channel *dst,
779            const union tgsi_double_channel *src)
780 {
781    dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
782    dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
783    dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
784    dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
785 }
786 
787 static void
micro_u64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)788 micro_u64sge(union tgsi_double_channel *dst,
789              const union tgsi_double_channel *src)
790 {
791    dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
792    dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
793    dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
794    dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
795 }
796 
797 static void
micro_u64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)798 micro_u64max(union tgsi_double_channel *dst,
799              const union tgsi_double_channel *src)
800 {
801    dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
802    dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
803    dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
804    dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
805 }
806 
807 static void
micro_i64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)808 micro_i64max(union tgsi_double_channel *dst,
809              const union tgsi_double_channel *src)
810 {
811    dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
812    dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
813    dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
814    dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
815 }
816 
817 static void
micro_u64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)818 micro_u64min(union tgsi_double_channel *dst,
819              const union tgsi_double_channel *src)
820 {
821    dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
822    dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
823    dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
824    dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
825 }
826 
827 static void
micro_i64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)828 micro_i64min(union tgsi_double_channel *dst,
829              const union tgsi_double_channel *src)
830 {
831    dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
832    dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
833    dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
834    dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
835 }
836 
837 static void
micro_u64add(union tgsi_double_channel * dst,const union tgsi_double_channel * src)838 micro_u64add(union tgsi_double_channel *dst,
839              const union tgsi_double_channel *src)
840 {
841    dst->u64[0] = src[0].u64[0] + src[1].u64[0];
842    dst->u64[1] = src[0].u64[1] + src[1].u64[1];
843    dst->u64[2] = src[0].u64[2] + src[1].u64[2];
844    dst->u64[3] = src[0].u64[3] + src[1].u64[3];
845 }
846 
847 static void
micro_u64mul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)848 micro_u64mul(union tgsi_double_channel *dst,
849              const union tgsi_double_channel *src)
850 {
851    dst->u64[0] = src[0].u64[0] * src[1].u64[0];
852    dst->u64[1] = src[0].u64[1] * src[1].u64[1];
853    dst->u64[2] = src[0].u64[2] * src[1].u64[2];
854    dst->u64[3] = src[0].u64[3] * src[1].u64[3];
855 }
856 
857 static void
micro_u64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)858 micro_u64div(union tgsi_double_channel *dst,
859              const union tgsi_double_channel *src)
860 {
861    dst->u64[0] = src[0].u64[0] / src[1].u64[0];
862    dst->u64[1] = src[0].u64[1] / src[1].u64[1];
863    dst->u64[2] = src[0].u64[2] / src[1].u64[2];
864    dst->u64[3] = src[0].u64[3] / src[1].u64[3];
865 }
866 
867 static void
micro_i64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)868 micro_i64div(union tgsi_double_channel *dst,
869              const union tgsi_double_channel *src)
870 {
871    dst->i64[0] = src[0].i64[0] / src[1].i64[0];
872    dst->i64[1] = src[0].i64[1] / src[1].i64[1];
873    dst->i64[2] = src[0].i64[2] / src[1].i64[2];
874    dst->i64[3] = src[0].i64[3] / src[1].i64[3];
875 }
876 
877 static void
micro_u64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)878 micro_u64mod(union tgsi_double_channel *dst,
879              const union tgsi_double_channel *src)
880 {
881    dst->u64[0] = src[0].u64[0] % src[1].u64[0];
882    dst->u64[1] = src[0].u64[1] % src[1].u64[1];
883    dst->u64[2] = src[0].u64[2] % src[1].u64[2];
884    dst->u64[3] = src[0].u64[3] % src[1].u64[3];
885 }
886 
887 static void
micro_i64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)888 micro_i64mod(union tgsi_double_channel *dst,
889              const union tgsi_double_channel *src)
890 {
891    dst->i64[0] = src[0].i64[0] % src[1].i64[0];
892    dst->i64[1] = src[0].i64[1] % src[1].i64[1];
893    dst->i64[2] = src[0].i64[2] % src[1].i64[2];
894    dst->i64[3] = src[0].i64[3] % src[1].i64[3];
895 }
896 
897 static void
micro_u64shl(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)898 micro_u64shl(union tgsi_double_channel *dst,
899              const union tgsi_double_channel *src0,
900              union tgsi_exec_channel *src1)
901 {
902    unsigned masked_count;
903    masked_count = src1->u[0] & 0x3f;
904    dst->u64[0] = src0->u64[0] << masked_count;
905    masked_count = src1->u[1] & 0x3f;
906    dst->u64[1] = src0->u64[1] << masked_count;
907    masked_count = src1->u[2] & 0x3f;
908    dst->u64[2] = src0->u64[2] << masked_count;
909    masked_count = src1->u[3] & 0x3f;
910    dst->u64[3] = src0->u64[3] << masked_count;
911 }
912 
913 static void
micro_i64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)914 micro_i64shr(union tgsi_double_channel *dst,
915              const union tgsi_double_channel *src0,
916              union tgsi_exec_channel *src1)
917 {
918    unsigned masked_count;
919    masked_count = src1->u[0] & 0x3f;
920    dst->i64[0] = src0->i64[0] >> masked_count;
921    masked_count = src1->u[1] & 0x3f;
922    dst->i64[1] = src0->i64[1] >> masked_count;
923    masked_count = src1->u[2] & 0x3f;
924    dst->i64[2] = src0->i64[2] >> masked_count;
925    masked_count = src1->u[3] & 0x3f;
926    dst->i64[3] = src0->i64[3] >> masked_count;
927 }
928 
929 static void
micro_u64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)930 micro_u64shr(union tgsi_double_channel *dst,
931              const union tgsi_double_channel *src0,
932              union tgsi_exec_channel *src1)
933 {
934    unsigned masked_count;
935    masked_count = src1->u[0] & 0x3f;
936    dst->u64[0] = src0->u64[0] >> masked_count;
937    masked_count = src1->u[1] & 0x3f;
938    dst->u64[1] = src0->u64[1] >> masked_count;
939    masked_count = src1->u[2] & 0x3f;
940    dst->u64[2] = src0->u64[2] >> masked_count;
941    masked_count = src1->u[3] & 0x3f;
942    dst->u64[3] = src0->u64[3] >> masked_count;
943 }
944 
945 enum tgsi_exec_datatype {
946    TGSI_EXEC_DATA_FLOAT,
947    TGSI_EXEC_DATA_INT,
948    TGSI_EXEC_DATA_UINT,
949    TGSI_EXEC_DATA_DOUBLE,
950    TGSI_EXEC_DATA_INT64,
951    TGSI_EXEC_DATA_UINT64,
952 };
953 
954 /*
955  * Shorthand locations of various utility registers (_I = Index, _C = Channel)
956  */
957 #define TEMP_KILMASK_I     TGSI_EXEC_TEMP_KILMASK_I
958 #define TEMP_KILMASK_C     TGSI_EXEC_TEMP_KILMASK_C
959 #define TEMP_OUTPUT_I      TGSI_EXEC_TEMP_OUTPUT_I
960 #define TEMP_OUTPUT_C      TGSI_EXEC_TEMP_OUTPUT_C
961 #define TEMP_PRIMITIVE_I   TGSI_EXEC_TEMP_PRIMITIVE_I
962 #define TEMP_PRIMITIVE_C   TGSI_EXEC_TEMP_PRIMITIVE_C
963 
964 
965 /** The execution mask depends on the conditional mask and the loop mask */
966 #define UPDATE_EXEC_MASK(MACH) \
967       MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
968 
969 
970 static const union tgsi_exec_channel ZeroVec =
971    { { 0.0, 0.0, 0.0, 0.0 } };
972 
973 static const union tgsi_exec_channel OneVec = {
974    {1.0f, 1.0f, 1.0f, 1.0f}
975 };
976 
977 static const union tgsi_exec_channel P128Vec = {
978    {128.0f, 128.0f, 128.0f, 128.0f}
979 };
980 
981 static const union tgsi_exec_channel M128Vec = {
982    {-128.0f, -128.0f, -128.0f, -128.0f}
983 };
984 
985 
986 /**
987  * Assert that none of the float values in 'chan' are infinite or NaN.
988  * NaN and Inf may occur normally during program execution and should
989  * not lead to crashes, etc.  But when debugging, it's helpful to catch
990  * them.
991  */
992 static inline void
check_inf_or_nan(const union tgsi_exec_channel * chan)993 check_inf_or_nan(const union tgsi_exec_channel *chan)
994 {
995    assert(!util_is_inf_or_nan((chan)->f[0]));
996    assert(!util_is_inf_or_nan((chan)->f[1]));
997    assert(!util_is_inf_or_nan((chan)->f[2]));
998    assert(!util_is_inf_or_nan((chan)->f[3]));
999 }
1000 
1001 
1002 #ifdef DEBUG
1003 static void
print_chan(const char * msg,const union tgsi_exec_channel * chan)1004 print_chan(const char *msg, const union tgsi_exec_channel *chan)
1005 {
1006    debug_printf("%s = {%f, %f, %f, %f}\n",
1007                 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
1008 }
1009 #endif
1010 
1011 
1012 #ifdef DEBUG
1013 static void
print_temp(const struct tgsi_exec_machine * mach,uint index)1014 print_temp(const struct tgsi_exec_machine *mach, uint index)
1015 {
1016    const struct tgsi_exec_vector *tmp = &mach->Temps[index];
1017    int i;
1018    debug_printf("Temp[%u] =\n", index);
1019    for (i = 0; i < 4; i++) {
1020       debug_printf("  %c: { %f, %f, %f, %f }\n",
1021                    "XYZW"[i],
1022                    tmp->xyzw[i].f[0],
1023                    tmp->xyzw[i].f[1],
1024                    tmp->xyzw[i].f[2],
1025                    tmp->xyzw[i].f[3]);
1026    }
1027 }
1028 #endif
1029 
1030 
1031 void
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine * mach,unsigned num_bufs,const void ** bufs,const unsigned * buf_sizes)1032 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1033                                unsigned num_bufs,
1034                                const void **bufs,
1035                                const unsigned *buf_sizes)
1036 {
1037    unsigned i;
1038 
1039    for (i = 0; i < num_bufs; i++) {
1040       mach->Consts[i] = bufs[i];
1041       mach->ConstsSize[i] = buf_sizes[i];
1042    }
1043 }
1044 
1045 
1046 /**
1047  * Check if there's a potential src/dst register data dependency when
1048  * using SOA execution.
1049  * Example:
1050  *   MOV T, T.yxwz;
1051  * This would expand into:
1052  *   MOV t0, t1;
1053  *   MOV t1, t0;
1054  *   MOV t2, t3;
1055  *   MOV t3, t2;
1056  * The second instruction will have the wrong value for t0 if executed as-is.
1057  */
1058 boolean
tgsi_check_soa_dependencies(const struct tgsi_full_instruction * inst)1059 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
1060 {
1061    uint i, chan;
1062 
1063    uint writemask = inst->Dst[0].Register.WriteMask;
1064    if (writemask == TGSI_WRITEMASK_X ||
1065        writemask == TGSI_WRITEMASK_Y ||
1066        writemask == TGSI_WRITEMASK_Z ||
1067        writemask == TGSI_WRITEMASK_W ||
1068        writemask == TGSI_WRITEMASK_NONE) {
1069       /* no chance of data dependency */
1070       return FALSE;
1071    }
1072 
1073    /* loop over src regs */
1074    for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1075       if ((inst->Src[i].Register.File ==
1076            inst->Dst[0].Register.File) &&
1077           ((inst->Src[i].Register.Index ==
1078             inst->Dst[0].Register.Index) ||
1079            inst->Src[i].Register.Indirect ||
1080            inst->Dst[0].Register.Indirect)) {
1081          /* loop over dest channels */
1082          uint channelsWritten = 0x0;
1083          for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1084             if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1085                /* check if we're reading a channel that's been written */
1086                uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
1087                if (channelsWritten & (1 << swizzle)) {
1088                   return TRUE;
1089                }
1090 
1091                channelsWritten |= (1 << chan);
1092             }
1093          }
1094       }
1095    }
1096    return FALSE;
1097 }
1098 
1099 
1100 /**
1101  * Initialize machine state by expanding tokens to full instructions,
1102  * allocating temporary storage, setting up constants, etc.
1103  * After this, we can call tgsi_exec_machine_run() many times.
1104  */
1105 void
tgsi_exec_machine_bind_shader(struct tgsi_exec_machine * mach,const struct tgsi_token * tokens,struct tgsi_sampler * sampler,struct tgsi_image * image,struct tgsi_buffer * buffer)1106 tgsi_exec_machine_bind_shader(
1107    struct tgsi_exec_machine *mach,
1108    const struct tgsi_token *tokens,
1109    struct tgsi_sampler *sampler,
1110    struct tgsi_image *image,
1111    struct tgsi_buffer *buffer)
1112 {
1113    uint k;
1114    struct tgsi_parse_context parse;
1115    struct tgsi_full_instruction *instructions;
1116    struct tgsi_full_declaration *declarations;
1117    uint maxInstructions = 10, numInstructions = 0;
1118    uint maxDeclarations = 10, numDeclarations = 0;
1119 
1120 #if 0
1121    tgsi_dump(tokens, 0);
1122 #endif
1123 
1124    util_init_math();
1125 
1126 
1127    mach->Tokens = tokens;
1128    mach->Sampler = sampler;
1129    mach->Image = image;
1130    mach->Buffer = buffer;
1131 
1132    if (!tokens) {
1133       /* unbind and free all */
1134       FREE(mach->Declarations);
1135       mach->Declarations = NULL;
1136       mach->NumDeclarations = 0;
1137 
1138       FREE(mach->Instructions);
1139       mach->Instructions = NULL;
1140       mach->NumInstructions = 0;
1141 
1142       return;
1143    }
1144 
1145    k = tgsi_parse_init (&parse, mach->Tokens);
1146    if (k != TGSI_PARSE_OK) {
1147       debug_printf( "Problem parsing!\n" );
1148       return;
1149    }
1150 
1151    mach->ImmLimit = 0;
1152    mach->NumOutputs = 0;
1153 
1154    for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1155       mach->SysSemanticToIndex[k] = -1;
1156 
1157    if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1158        !mach->UsedGeometryShader) {
1159       struct tgsi_exec_vector *inputs;
1160       struct tgsi_exec_vector *outputs;
1161 
1162       inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1163                             TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1164                             16);
1165 
1166       if (!inputs)
1167          return;
1168 
1169       outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1170                              TGSI_MAX_TOTAL_VERTICES, 16);
1171 
1172       if (!outputs) {
1173          align_free(inputs);
1174          return;
1175       }
1176 
1177       align_free(mach->Inputs);
1178       align_free(mach->Outputs);
1179 
1180       mach->Inputs = inputs;
1181       mach->Outputs = outputs;
1182       mach->UsedGeometryShader = TRUE;
1183    }
1184 
1185    declarations = (struct tgsi_full_declaration *)
1186       MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1187 
1188    if (!declarations) {
1189       return;
1190    }
1191 
1192    instructions = (struct tgsi_full_instruction *)
1193       MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1194 
1195    if (!instructions) {
1196       FREE( declarations );
1197       return;
1198    }
1199 
1200    while( !tgsi_parse_end_of_tokens( &parse ) ) {
1201       uint i;
1202 
1203       tgsi_parse_token( &parse );
1204       switch( parse.FullToken.Token.Type ) {
1205       case TGSI_TOKEN_TYPE_DECLARATION:
1206          /* save expanded declaration */
1207          if (numDeclarations == maxDeclarations) {
1208             declarations = REALLOC(declarations,
1209                                    maxDeclarations
1210                                    * sizeof(struct tgsi_full_declaration),
1211                                    (maxDeclarations + 10)
1212                                    * sizeof(struct tgsi_full_declaration));
1213             maxDeclarations += 10;
1214          }
1215          if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
1216             unsigned reg;
1217             for (reg = parse.FullToken.FullDeclaration.Range.First;
1218                  reg <= parse.FullToken.FullDeclaration.Range.Last;
1219                  ++reg) {
1220                ++mach->NumOutputs;
1221             }
1222          }
1223          else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1224             const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1225             mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1226          }
1227 
1228          memcpy(declarations + numDeclarations,
1229                 &parse.FullToken.FullDeclaration,
1230                 sizeof(declarations[0]));
1231          numDeclarations++;
1232          break;
1233 
1234       case TGSI_TOKEN_TYPE_IMMEDIATE:
1235          {
1236             uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1237             assert( size <= 4 );
1238             assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
1239 
1240             for( i = 0; i < size; i++ ) {
1241                mach->Imms[mach->ImmLimit][i] =
1242 		  parse.FullToken.FullImmediate.u[i].Float;
1243             }
1244             mach->ImmLimit += 1;
1245          }
1246          break;
1247 
1248       case TGSI_TOKEN_TYPE_INSTRUCTION:
1249 
1250          /* save expanded instruction */
1251          if (numInstructions == maxInstructions) {
1252             instructions = REALLOC(instructions,
1253                                    maxInstructions
1254                                    * sizeof(struct tgsi_full_instruction),
1255                                    (maxInstructions + 10)
1256                                    * sizeof(struct tgsi_full_instruction));
1257             maxInstructions += 10;
1258          }
1259 
1260          memcpy(instructions + numInstructions,
1261                 &parse.FullToken.FullInstruction,
1262                 sizeof(instructions[0]));
1263 
1264          numInstructions++;
1265          break;
1266 
1267       case TGSI_TOKEN_TYPE_PROPERTY:
1268          if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1269             if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1270                mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1271             }
1272          }
1273          break;
1274 
1275       default:
1276          assert( 0 );
1277       }
1278    }
1279    tgsi_parse_free (&parse);
1280 
1281    FREE(mach->Declarations);
1282    mach->Declarations = declarations;
1283    mach->NumDeclarations = numDeclarations;
1284 
1285    FREE(mach->Instructions);
1286    mach->Instructions = instructions;
1287    mach->NumInstructions = numInstructions;
1288 }
1289 
1290 
1291 struct tgsi_exec_machine *
tgsi_exec_machine_create(enum pipe_shader_type shader_type)1292 tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1293 {
1294    struct tgsi_exec_machine *mach;
1295    uint i;
1296 
1297    mach = align_malloc( sizeof *mach, 16 );
1298    if (!mach)
1299       goto fail;
1300 
1301    memset(mach, 0, sizeof(*mach));
1302 
1303    mach->ShaderType = shader_type;
1304    mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
1305    mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
1306    mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
1307 
1308    if (shader_type != PIPE_SHADER_COMPUTE) {
1309       mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1310       mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1311       if (!mach->Inputs || !mach->Outputs)
1312          goto fail;
1313    }
1314 
1315    /* Setup constants needed by the SSE2 executor. */
1316    for( i = 0; i < 4; i++ ) {
1317       mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
1318       mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
1319       mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
1320       mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF;    /* not used */
1321       mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
1322       mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f;    /* not used */
1323       mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
1324       mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
1325       mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
1326       mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
1327    }
1328 
1329 #ifdef DEBUG
1330    /* silence warnings */
1331    (void) print_chan;
1332    (void) print_temp;
1333 #endif
1334 
1335    return mach;
1336 
1337 fail:
1338    if (mach) {
1339       align_free(mach->Inputs);
1340       align_free(mach->Outputs);
1341       align_free(mach);
1342    }
1343    return NULL;
1344 }
1345 
1346 
1347 void
tgsi_exec_machine_destroy(struct tgsi_exec_machine * mach)1348 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1349 {
1350    if (mach) {
1351       FREE(mach->Instructions);
1352       FREE(mach->Declarations);
1353 
1354       align_free(mach->Inputs);
1355       align_free(mach->Outputs);
1356 
1357       align_free(mach);
1358    }
1359 }
1360 
1361 static void
micro_add(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1362 micro_add(union tgsi_exec_channel *dst,
1363           const union tgsi_exec_channel *src0,
1364           const union tgsi_exec_channel *src1)
1365 {
1366    dst->f[0] = src0->f[0] + src1->f[0];
1367    dst->f[1] = src0->f[1] + src1->f[1];
1368    dst->f[2] = src0->f[2] + src1->f[2];
1369    dst->f[3] = src0->f[3] + src1->f[3];
1370 }
1371 
1372 static void
micro_div(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1373 micro_div(
1374    union tgsi_exec_channel *dst,
1375    const union tgsi_exec_channel *src0,
1376    const union tgsi_exec_channel *src1 )
1377 {
1378    if (src1->f[0] != 0) {
1379       dst->f[0] = src0->f[0] / src1->f[0];
1380    }
1381    if (src1->f[1] != 0) {
1382       dst->f[1] = src0->f[1] / src1->f[1];
1383    }
1384    if (src1->f[2] != 0) {
1385       dst->f[2] = src0->f[2] / src1->f[2];
1386    }
1387    if (src1->f[3] != 0) {
1388       dst->f[3] = src0->f[3] / src1->f[3];
1389    }
1390 }
1391 
1392 static void
micro_lt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)1393 micro_lt(
1394    union tgsi_exec_channel *dst,
1395    const union tgsi_exec_channel *src0,
1396    const union tgsi_exec_channel *src1,
1397    const union tgsi_exec_channel *src2,
1398    const union tgsi_exec_channel *src3 )
1399 {
1400    dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1401    dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1402    dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1403    dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1404 }
1405 
1406 static void
micro_max(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1407 micro_max(union tgsi_exec_channel *dst,
1408           const union tgsi_exec_channel *src0,
1409           const union tgsi_exec_channel *src1)
1410 {
1411    dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
1412    dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
1413    dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
1414    dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
1415 }
1416 
1417 static void
micro_min(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1418 micro_min(union tgsi_exec_channel *dst,
1419           const union tgsi_exec_channel *src0,
1420           const union tgsi_exec_channel *src1)
1421 {
1422    dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
1423    dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
1424    dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
1425    dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
1426 }
1427 
1428 static void
micro_mul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1429 micro_mul(union tgsi_exec_channel *dst,
1430           const union tgsi_exec_channel *src0,
1431           const union tgsi_exec_channel *src1)
1432 {
1433    dst->f[0] = src0->f[0] * src1->f[0];
1434    dst->f[1] = src0->f[1] * src1->f[1];
1435    dst->f[2] = src0->f[2] * src1->f[2];
1436    dst->f[3] = src0->f[3] * src1->f[3];
1437 }
1438 
1439 static void
micro_neg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)1440 micro_neg(
1441    union tgsi_exec_channel *dst,
1442    const union tgsi_exec_channel *src )
1443 {
1444    dst->f[0] = -src->f[0];
1445    dst->f[1] = -src->f[1];
1446    dst->f[2] = -src->f[2];
1447    dst->f[3] = -src->f[3];
1448 }
1449 
1450 static void
micro_pow(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1451 micro_pow(
1452    union tgsi_exec_channel *dst,
1453    const union tgsi_exec_channel *src0,
1454    const union tgsi_exec_channel *src1 )
1455 {
1456 #if FAST_MATH
1457    dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1458    dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1459    dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1460    dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1461 #else
1462    dst->f[0] = powf( src0->f[0], src1->f[0] );
1463    dst->f[1] = powf( src0->f[1], src1->f[1] );
1464    dst->f[2] = powf( src0->f[2], src1->f[2] );
1465    dst->f[3] = powf( src0->f[3], src1->f[3] );
1466 #endif
1467 }
1468 
1469 static void
micro_sub(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1470 micro_sub(union tgsi_exec_channel *dst,
1471           const union tgsi_exec_channel *src0,
1472           const union tgsi_exec_channel *src1)
1473 {
1474    dst->f[0] = src0->f[0] - src1->f[0];
1475    dst->f[1] = src0->f[1] - src1->f[1];
1476    dst->f[2] = src0->f[2] - src1->f[2];
1477    dst->f[3] = src0->f[3] - src1->f[3];
1478 }
1479 
1480 static void
fetch_src_file_channel(const struct tgsi_exec_machine * mach,const uint chan_index,const uint file,const uint swizzle,const union tgsi_exec_channel * index,const union tgsi_exec_channel * index2D,union tgsi_exec_channel * chan)1481 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1482                        const uint chan_index,
1483                        const uint file,
1484                        const uint swizzle,
1485                        const union tgsi_exec_channel *index,
1486                        const union tgsi_exec_channel *index2D,
1487                        union tgsi_exec_channel *chan)
1488 {
1489    uint i;
1490 
1491    assert(swizzle < 4);
1492 
1493    switch (file) {
1494    case TGSI_FILE_CONSTANT:
1495       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1496          assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1497          assert(mach->Consts[index2D->i[i]]);
1498 
1499          if (index->i[i] < 0) {
1500             chan->u[i] = 0;
1501          } else {
1502             /* NOTE: copying the const value as a uint instead of float */
1503             const uint constbuf = index2D->i[i];
1504             const uint *buf = (const uint *)mach->Consts[constbuf];
1505             const int pos = index->i[i] * 4 + swizzle;
1506             /* const buffer bounds check */
1507             if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1508                if (0) {
1509                   /* Debug: print warning */
1510                   static int count = 0;
1511                   if (count++ < 100)
1512                      debug_printf("TGSI Exec: const buffer index %d"
1513                                   " out of bounds\n", pos);
1514                }
1515                chan->u[i] = 0;
1516             }
1517             else
1518                chan->u[i] = buf[pos];
1519          }
1520       }
1521       break;
1522 
1523    case TGSI_FILE_INPUT:
1524       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1525          /*
1526          if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1527             debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1528                          index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1529                          index2D->i[i], index->i[i]);
1530                          }*/
1531          int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1532          assert(pos >= 0);
1533          assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1534          chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1535       }
1536       break;
1537 
1538    case TGSI_FILE_SYSTEM_VALUE:
1539       /* XXX no swizzling at this point.  Will be needed if we put
1540        * gl_FragCoord, for example, in a sys value register.
1541        */
1542       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1543          chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1544       }
1545       break;
1546 
1547    case TGSI_FILE_TEMPORARY:
1548       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1549          assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1550          assert(index2D->i[i] == 0);
1551 
1552          chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1553       }
1554       break;
1555 
1556    case TGSI_FILE_IMMEDIATE:
1557       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1558          assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1559          assert(index2D->i[i] == 0);
1560 
1561          chan->f[i] = mach->Imms[index->i[i]][swizzle];
1562       }
1563       break;
1564 
1565    case TGSI_FILE_ADDRESS:
1566       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1567          assert(index->i[i] >= 0);
1568          assert(index2D->i[i] == 0);
1569 
1570          chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1571       }
1572       break;
1573 
1574    case TGSI_FILE_PREDICATE:
1575       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1576          assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1577          assert(index2D->i[i] == 0);
1578 
1579          chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1580       }
1581       break;
1582 
1583    case TGSI_FILE_OUTPUT:
1584       /* vertex/fragment output vars can be read too */
1585       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1586          assert(index->i[i] >= 0);
1587          assert(index2D->i[i] == 0);
1588 
1589          chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1590       }
1591       break;
1592 
1593    default:
1594       assert(0);
1595       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1596          chan->u[i] = 0;
1597       }
1598    }
1599 }
1600 
1601 static void
fetch_source_d(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index,enum tgsi_exec_datatype src_datatype)1602 fetch_source_d(const struct tgsi_exec_machine *mach,
1603                union tgsi_exec_channel *chan,
1604                const struct tgsi_full_src_register *reg,
1605                const uint chan_index,
1606                enum tgsi_exec_datatype src_datatype)
1607 {
1608    union tgsi_exec_channel index;
1609    union tgsi_exec_channel index2D;
1610    uint swizzle;
1611 
1612    /* We start with a direct index into a register file.
1613     *
1614     *    file[1],
1615     *    where:
1616     *       file = Register.File
1617     *       [1] = Register.Index
1618     */
1619    index.i[0] =
1620    index.i[1] =
1621    index.i[2] =
1622    index.i[3] = reg->Register.Index;
1623 
1624    /* There is an extra source register that indirectly subscripts
1625     * a register file. The direct index now becomes an offset
1626     * that is being added to the indirect register.
1627     *
1628     *    file[ind[2].x+1],
1629     *    where:
1630     *       ind = Indirect.File
1631     *       [2] = Indirect.Index
1632     *       .x = Indirect.SwizzleX
1633     */
1634    if (reg->Register.Indirect) {
1635       union tgsi_exec_channel index2;
1636       union tgsi_exec_channel indir_index;
1637       const uint execmask = mach->ExecMask;
1638       uint i;
1639 
1640       /* which address register (always zero now) */
1641       index2.i[0] =
1642       index2.i[1] =
1643       index2.i[2] =
1644       index2.i[3] = reg->Indirect.Index;
1645       /* get current value of address register[swizzle] */
1646       swizzle = reg->Indirect.Swizzle;
1647       fetch_src_file_channel(mach,
1648                              chan_index,
1649                              reg->Indirect.File,
1650                              swizzle,
1651                              &index2,
1652                              &ZeroVec,
1653                              &indir_index);
1654 
1655       /* add value of address register to the offset */
1656       index.i[0] += indir_index.i[0];
1657       index.i[1] += indir_index.i[1];
1658       index.i[2] += indir_index.i[2];
1659       index.i[3] += indir_index.i[3];
1660 
1661       /* for disabled execution channels, zero-out the index to
1662        * avoid using a potential garbage value.
1663        */
1664       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1665          if ((execmask & (1 << i)) == 0)
1666             index.i[i] = 0;
1667       }
1668    }
1669 
1670    /* There is an extra source register that is a second
1671     * subscript to a register file. Effectively it means that
1672     * the register file is actually a 2D array of registers.
1673     *
1674     *    file[3][1],
1675     *    where:
1676     *       [3] = Dimension.Index
1677     */
1678    if (reg->Register.Dimension) {
1679       index2D.i[0] =
1680       index2D.i[1] =
1681       index2D.i[2] =
1682       index2D.i[3] = reg->Dimension.Index;
1683 
1684       /* Again, the second subscript index can be addressed indirectly
1685        * identically to the first one.
1686        * Nothing stops us from indirectly addressing the indirect register,
1687        * but there is no need for that, so we won't exercise it.
1688        *
1689        *    file[ind[4].y+3][1],
1690        *    where:
1691        *       ind = DimIndirect.File
1692        *       [4] = DimIndirect.Index
1693        *       .y = DimIndirect.SwizzleX
1694        */
1695       if (reg->Dimension.Indirect) {
1696          union tgsi_exec_channel index2;
1697          union tgsi_exec_channel indir_index;
1698          const uint execmask = mach->ExecMask;
1699          uint i;
1700 
1701          index2.i[0] =
1702          index2.i[1] =
1703          index2.i[2] =
1704          index2.i[3] = reg->DimIndirect.Index;
1705 
1706          swizzle = reg->DimIndirect.Swizzle;
1707          fetch_src_file_channel(mach,
1708                                 chan_index,
1709                                 reg->DimIndirect.File,
1710                                 swizzle,
1711                                 &index2,
1712                                 &ZeroVec,
1713                                 &indir_index);
1714 
1715          index2D.i[0] += indir_index.i[0];
1716          index2D.i[1] += indir_index.i[1];
1717          index2D.i[2] += indir_index.i[2];
1718          index2D.i[3] += indir_index.i[3];
1719 
1720          /* for disabled execution channels, zero-out the index to
1721           * avoid using a potential garbage value.
1722           */
1723          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1724             if ((execmask & (1 << i)) == 0) {
1725                index2D.i[i] = 0;
1726             }
1727          }
1728       }
1729 
1730       /* If by any chance there was a need for a 3D array of register
1731        * files, we would have to check whether Dimension is followed
1732        * by a dimension register and continue the saga.
1733        */
1734    } else {
1735       index2D.i[0] =
1736       index2D.i[1] =
1737       index2D.i[2] =
1738       index2D.i[3] = 0;
1739    }
1740 
1741    swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1742    fetch_src_file_channel(mach,
1743                           chan_index,
1744                           reg->Register.File,
1745                           swizzle,
1746                           &index,
1747                           &index2D,
1748                           chan);
1749 }
1750 
1751 static void
fetch_source(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index,enum tgsi_exec_datatype src_datatype)1752 fetch_source(const struct tgsi_exec_machine *mach,
1753              union tgsi_exec_channel *chan,
1754              const struct tgsi_full_src_register *reg,
1755              const uint chan_index,
1756              enum tgsi_exec_datatype src_datatype)
1757 {
1758    fetch_source_d(mach, chan, reg, chan_index, src_datatype);
1759 
1760    if (reg->Register.Absolute) {
1761       if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1762          micro_abs(chan, chan);
1763       } else {
1764          micro_iabs(chan, chan);
1765       }
1766    }
1767 
1768    if (reg->Register.Negate) {
1769       if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1770          micro_neg(chan, chan);
1771       } else {
1772          micro_ineg(chan, chan);
1773       }
1774    }
1775 }
1776 
1777 static union tgsi_exec_channel *
store_dest_dstret(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_index,enum tgsi_exec_datatype dst_datatype)1778 store_dest_dstret(struct tgsi_exec_machine *mach,
1779                  const union tgsi_exec_channel *chan,
1780                  const struct tgsi_full_dst_register *reg,
1781                  const struct tgsi_full_instruction *inst,
1782                  uint chan_index,
1783                  enum tgsi_exec_datatype dst_datatype)
1784 {
1785    uint i;
1786    static union tgsi_exec_channel null;
1787    union tgsi_exec_channel *dst;
1788    union tgsi_exec_channel index2D;
1789    uint execmask = mach->ExecMask;
1790    int offset = 0;  /* indirection offset */
1791    int index;
1792 
1793    /* for debugging */
1794    if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1795       check_inf_or_nan(chan);
1796    }
1797 
1798    /* There is an extra source register that indirectly subscripts
1799     * a register file. The direct index now becomes an offset
1800     * that is being added to the indirect register.
1801     *
1802     *    file[ind[2].x+1],
1803     *    where:
1804     *       ind = Indirect.File
1805     *       [2] = Indirect.Index
1806     *       .x = Indirect.SwizzleX
1807     */
1808    if (reg->Register.Indirect) {
1809       union tgsi_exec_channel index;
1810       union tgsi_exec_channel indir_index;
1811       uint swizzle;
1812 
1813       /* which address register (always zero for now) */
1814       index.i[0] =
1815       index.i[1] =
1816       index.i[2] =
1817       index.i[3] = reg->Indirect.Index;
1818 
1819       /* get current value of address register[swizzle] */
1820       swizzle = reg->Indirect.Swizzle;
1821 
1822       /* fetch values from the address/indirection register */
1823       fetch_src_file_channel(mach,
1824                              chan_index,
1825                              reg->Indirect.File,
1826                              swizzle,
1827                              &index,
1828                              &ZeroVec,
1829                              &indir_index);
1830 
1831       /* save indirection offset */
1832       offset = indir_index.i[0];
1833    }
1834 
1835    /* There is an extra source register that is a second
1836     * subscript to a register file. Effectively it means that
1837     * the register file is actually a 2D array of registers.
1838     *
1839     *    file[3][1],
1840     *    where:
1841     *       [3] = Dimension.Index
1842     */
1843    if (reg->Register.Dimension) {
1844       index2D.i[0] =
1845       index2D.i[1] =
1846       index2D.i[2] =
1847       index2D.i[3] = reg->Dimension.Index;
1848 
1849       /* Again, the second subscript index can be addressed indirectly
1850        * identically to the first one.
1851        * Nothing stops us from indirectly addressing the indirect register,
1852        * but there is no need for that, so we won't exercise it.
1853        *
1854        *    file[ind[4].y+3][1],
1855        *    where:
1856        *       ind = DimIndirect.File
1857        *       [4] = DimIndirect.Index
1858        *       .y = DimIndirect.SwizzleX
1859        */
1860       if (reg->Dimension.Indirect) {
1861          union tgsi_exec_channel index2;
1862          union tgsi_exec_channel indir_index;
1863          const uint execmask = mach->ExecMask;
1864          unsigned swizzle;
1865          uint i;
1866 
1867          index2.i[0] =
1868          index2.i[1] =
1869          index2.i[2] =
1870          index2.i[3] = reg->DimIndirect.Index;
1871 
1872          swizzle = reg->DimIndirect.Swizzle;
1873          fetch_src_file_channel(mach,
1874                                 chan_index,
1875                                 reg->DimIndirect.File,
1876                                 swizzle,
1877                                 &index2,
1878                                 &ZeroVec,
1879                                 &indir_index);
1880 
1881          index2D.i[0] += indir_index.i[0];
1882          index2D.i[1] += indir_index.i[1];
1883          index2D.i[2] += indir_index.i[2];
1884          index2D.i[3] += indir_index.i[3];
1885 
1886          /* for disabled execution channels, zero-out the index to
1887           * avoid using a potential garbage value.
1888           */
1889          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1890             if ((execmask & (1 << i)) == 0) {
1891                index2D.i[i] = 0;
1892             }
1893          }
1894       }
1895 
1896       /* If by any chance there was a need for a 3D array of register
1897        * files, we would have to check whether Dimension is followed
1898        * by a dimension register and continue the saga.
1899        */
1900    } else {
1901       index2D.i[0] =
1902       index2D.i[1] =
1903       index2D.i[2] =
1904       index2D.i[3] = 0;
1905    }
1906 
1907    switch (reg->Register.File) {
1908    case TGSI_FILE_NULL:
1909       dst = &null;
1910       break;
1911 
1912    case TGSI_FILE_OUTPUT:
1913       index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1914          + reg->Register.Index;
1915       dst = &mach->Outputs[offset + index].xyzw[chan_index];
1916 #if 0
1917       debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1918                    mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1919                    reg->Register.Index);
1920       if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1921          debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1922          for (i = 0; i < TGSI_QUAD_SIZE; i++)
1923             if (execmask & (1 << i))
1924                debug_printf("%f, ", chan->f[i]);
1925          debug_printf(")\n");
1926       }
1927 #endif
1928       break;
1929 
1930    case TGSI_FILE_TEMPORARY:
1931       index = reg->Register.Index;
1932       assert( index < TGSI_EXEC_NUM_TEMPS );
1933       dst = &mach->Temps[offset + index].xyzw[chan_index];
1934       break;
1935 
1936    case TGSI_FILE_ADDRESS:
1937       index = reg->Register.Index;
1938       dst = &mach->Addrs[index].xyzw[chan_index];
1939       break;
1940 
1941    case TGSI_FILE_PREDICATE:
1942       index = reg->Register.Index;
1943       assert(index < TGSI_EXEC_NUM_PREDS);
1944       dst = &mach->Predicates[index].xyzw[chan_index];
1945       break;
1946 
1947    default:
1948       assert( 0 );
1949       return NULL;
1950    }
1951 
1952    if (inst->Instruction.Predicate) {
1953       uint swizzle;
1954       union tgsi_exec_channel *pred;
1955 
1956       switch (chan_index) {
1957       case TGSI_CHAN_X:
1958          swizzle = inst->Predicate.SwizzleX;
1959          break;
1960       case TGSI_CHAN_Y:
1961          swizzle = inst->Predicate.SwizzleY;
1962          break;
1963       case TGSI_CHAN_Z:
1964          swizzle = inst->Predicate.SwizzleZ;
1965          break;
1966       case TGSI_CHAN_W:
1967          swizzle = inst->Predicate.SwizzleW;
1968          break;
1969       default:
1970          assert(0);
1971          return NULL;
1972       }
1973 
1974       assert(inst->Predicate.Index == 0);
1975 
1976       pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1977 
1978       if (inst->Predicate.Negate) {
1979          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1980             if (pred->u[i]) {
1981                execmask &= ~(1 << i);
1982             }
1983          }
1984       } else {
1985          for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1986             if (!pred->u[i]) {
1987                execmask &= ~(1 << i);
1988             }
1989          }
1990       }
1991    }
1992 
1993    return dst;
1994 }
1995 
1996 static void
store_dest_double(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_index,enum tgsi_exec_datatype dst_datatype)1997 store_dest_double(struct tgsi_exec_machine *mach,
1998                  const union tgsi_exec_channel *chan,
1999                  const struct tgsi_full_dst_register *reg,
2000                  const struct tgsi_full_instruction *inst,
2001                  uint chan_index,
2002                  enum tgsi_exec_datatype dst_datatype)
2003 {
2004    union tgsi_exec_channel *dst;
2005    const uint execmask = mach->ExecMask;
2006    int i;
2007 
2008    dst = store_dest_dstret(mach, chan, reg, inst, chan_index,
2009 			   dst_datatype);
2010    if (!dst)
2011       return;
2012 
2013    /* doubles path */
2014    for (i = 0; i < TGSI_QUAD_SIZE; i++)
2015       if (execmask & (1 << i))
2016          dst->i[i] = chan->i[i];
2017 }
2018 
2019 static void
store_dest(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_index,enum tgsi_exec_datatype dst_datatype)2020 store_dest(struct tgsi_exec_machine *mach,
2021            const union tgsi_exec_channel *chan,
2022            const struct tgsi_full_dst_register *reg,
2023            const struct tgsi_full_instruction *inst,
2024            uint chan_index,
2025            enum tgsi_exec_datatype dst_datatype)
2026 {
2027    union tgsi_exec_channel *dst;
2028    const uint execmask = mach->ExecMask;
2029    int i;
2030 
2031    dst = store_dest_dstret(mach, chan, reg, inst, chan_index,
2032                     dst_datatype);
2033    if (!dst)
2034       return;
2035 
2036    if (!inst->Instruction.Saturate) {
2037       for (i = 0; i < TGSI_QUAD_SIZE; i++)
2038          if (execmask & (1 << i))
2039             dst->i[i] = chan->i[i];
2040    }
2041    else {
2042       for (i = 0; i < TGSI_QUAD_SIZE; i++)
2043          if (execmask & (1 << i)) {
2044             if (chan->f[i] < 0.0f)
2045                dst->f[i] = 0.0f;
2046             else if (chan->f[i] > 1.0f)
2047                dst->f[i] = 1.0f;
2048             else
2049                dst->i[i] = chan->i[i];
2050          }
2051    }
2052 }
2053 
2054 #define FETCH(VAL,INDEX,CHAN)\
2055     fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
2056 
2057 #define IFETCH(VAL,INDEX,CHAN)\
2058     fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
2059 
2060 
2061 /**
2062  * Execute ARB-style KIL which is predicated by a src register.
2063  * Kill fragment if any of the four values is less than zero.
2064  */
2065 static void
exec_kill_if(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2066 exec_kill_if(struct tgsi_exec_machine *mach,
2067              const struct tgsi_full_instruction *inst)
2068 {
2069    uint uniquemask;
2070    uint chan_index;
2071    uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2072    union tgsi_exec_channel r[1];
2073 
2074    /* This mask stores component bits that were already tested. */
2075    uniquemask = 0;
2076 
2077    for (chan_index = 0; chan_index < 4; chan_index++)
2078    {
2079       uint swizzle;
2080       uint i;
2081 
2082       /* unswizzle channel */
2083       swizzle = tgsi_util_get_full_src_register_swizzle (
2084                         &inst->Src[0],
2085                         chan_index);
2086 
2087       /* check if the component has not been already tested */
2088       if (uniquemask & (1 << swizzle))
2089          continue;
2090       uniquemask |= 1 << swizzle;
2091 
2092       FETCH(&r[0], 0, chan_index);
2093       for (i = 0; i < 4; i++)
2094          if (r[0].f[i] < 0.0f)
2095             kilmask |= 1 << i;
2096    }
2097 
2098    /* restrict to fragments currently executing */
2099    kilmask &= mach->ExecMask;
2100 
2101    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2102 }
2103 
2104 /**
2105  * Unconditional fragment kill/discard.
2106  */
2107 static void
exec_kill(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2108 exec_kill(struct tgsi_exec_machine *mach,
2109           const struct tgsi_full_instruction *inst)
2110 {
2111    uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2112 
2113    /* kill fragment for all fragments currently executing */
2114    kilmask = mach->ExecMask;
2115    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2116 }
2117 
2118 static void
emit_vertex(struct tgsi_exec_machine * mach)2119 emit_vertex(struct tgsi_exec_machine *mach)
2120 {
2121    /* FIXME: check for exec mask correctly
2122    unsigned i;
2123    for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2124          if ((mach->ExecMask & (1 << i)))
2125    */
2126    if (mach->ExecMask) {
2127       if (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= mach->MaxOutputVertices)
2128          return;
2129 
2130       mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
2131       mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2132    }
2133 }
2134 
2135 static void
emit_primitive(struct tgsi_exec_machine * mach)2136 emit_primitive(struct tgsi_exec_machine *mach)
2137 {
2138    unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
2139    /* FIXME: check for exec mask correctly
2140    unsigned i;
2141    for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2142          if ((mach->ExecMask & (1 << i)))
2143    */
2144    if (mach->ExecMask) {
2145       ++(*prim_count);
2146       debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
2147       mach->Primitives[*prim_count] = 0;
2148    }
2149 }
2150 
2151 static void
conditional_emit_primitive(struct tgsi_exec_machine * mach)2152 conditional_emit_primitive(struct tgsi_exec_machine *mach)
2153 {
2154    if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
2155       int emitted_verts =
2156          mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
2157       if (emitted_verts) {
2158          emit_primitive(mach);
2159       }
2160    }
2161 }
2162 
2163 
2164 /*
2165  * Fetch four texture samples using STR texture coordinates.
2166  */
2167 static void
fetch_texel(struct tgsi_sampler * sampler,const unsigned sview_idx,const unsigned sampler_idx,const union tgsi_exec_channel * s,const union tgsi_exec_channel * t,const union tgsi_exec_channel * p,const union tgsi_exec_channel * c0,const union tgsi_exec_channel * c1,float derivs[3][2][TGSI_QUAD_SIZE],const int8_t offset[3],enum tgsi_sampler_control control,union tgsi_exec_channel * r,union tgsi_exec_channel * g,union tgsi_exec_channel * b,union tgsi_exec_channel * a)2168 fetch_texel( struct tgsi_sampler *sampler,
2169              const unsigned sview_idx,
2170              const unsigned sampler_idx,
2171              const union tgsi_exec_channel *s,
2172              const union tgsi_exec_channel *t,
2173              const union tgsi_exec_channel *p,
2174              const union tgsi_exec_channel *c0,
2175              const union tgsi_exec_channel *c1,
2176              float derivs[3][2][TGSI_QUAD_SIZE],
2177              const int8_t offset[3],
2178              enum tgsi_sampler_control control,
2179              union tgsi_exec_channel *r,
2180              union tgsi_exec_channel *g,
2181              union tgsi_exec_channel *b,
2182              union tgsi_exec_channel *a )
2183 {
2184    uint j;
2185    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2186 
2187    /* FIXME: handle explicit derivs, offsets */
2188    sampler->get_samples(sampler, sview_idx, sampler_idx,
2189                         s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
2190 
2191    for (j = 0; j < 4; j++) {
2192       r->f[j] = rgba[0][j];
2193       g->f[j] = rgba[1][j];
2194       b->f[j] = rgba[2][j];
2195       a->f[j] = rgba[3][j];
2196    }
2197 }
2198 
2199 
2200 #define TEX_MODIFIER_NONE           0
2201 #define TEX_MODIFIER_PROJECTED      1
2202 #define TEX_MODIFIER_LOD_BIAS       2
2203 #define TEX_MODIFIER_EXPLICIT_LOD   3
2204 #define TEX_MODIFIER_LEVEL_ZERO     4
2205 #define TEX_MODIFIER_GATHER         5
2206 
2207 /*
2208  * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2209  */
2210 static void
fetch_texel_offsets(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int8_t offsets[3])2211 fetch_texel_offsets(struct tgsi_exec_machine *mach,
2212                     const struct tgsi_full_instruction *inst,
2213                     int8_t offsets[3])
2214 {
2215    if (inst->Texture.NumOffsets == 1) {
2216       union tgsi_exec_channel index;
2217       union tgsi_exec_channel offset[3];
2218       index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2219       fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2220                              inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2221       fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2222                              inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2223       fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2224                              inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2225      offsets[0] = offset[0].i[0];
2226      offsets[1] = offset[1].i[0];
2227      offsets[2] = offset[2].i[0];
2228    } else {
2229      assert(inst->Texture.NumOffsets == 0);
2230      offsets[0] = offsets[1] = offsets[2] = 0;
2231    }
2232 }
2233 
2234 
2235 /*
2236  * Fetch dx and dy values for one channel (s, t or r).
2237  * Put dx values into one float array, dy values into another.
2238  */
2239 static void
fetch_assign_deriv_channel(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,unsigned regdsrcx,unsigned chan,float derivs[2][TGSI_QUAD_SIZE])2240 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2241                            const struct tgsi_full_instruction *inst,
2242                            unsigned regdsrcx,
2243                            unsigned chan,
2244                            float derivs[2][TGSI_QUAD_SIZE])
2245 {
2246    union tgsi_exec_channel d;
2247    FETCH(&d, regdsrcx, chan);
2248    derivs[0][0] = d.f[0];
2249    derivs[0][1] = d.f[1];
2250    derivs[0][2] = d.f[2];
2251    derivs[0][3] = d.f[3];
2252    FETCH(&d, regdsrcx + 1, chan);
2253    derivs[1][0] = d.f[0];
2254    derivs[1][1] = d.f[1];
2255    derivs[1][2] = d.f[2];
2256    derivs[1][3] = d.f[3];
2257 }
2258 
2259 static uint
fetch_sampler_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint sampler)2260 fetch_sampler_unit(struct tgsi_exec_machine *mach,
2261                    const struct tgsi_full_instruction *inst,
2262                    uint sampler)
2263 {
2264    uint unit = 0;
2265    int i;
2266    if (inst->Src[sampler].Register.Indirect) {
2267       const struct tgsi_full_src_register *reg = &inst->Src[sampler];
2268       union tgsi_exec_channel indir_index, index2;
2269       const uint execmask = mach->ExecMask;
2270       index2.i[0] =
2271       index2.i[1] =
2272       index2.i[2] =
2273       index2.i[3] = reg->Indirect.Index;
2274 
2275       fetch_src_file_channel(mach,
2276                              0,
2277                              reg->Indirect.File,
2278                              reg->Indirect.Swizzle,
2279                              &index2,
2280                              &ZeroVec,
2281                              &indir_index);
2282       for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2283          if (execmask & (1 << i)) {
2284             unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2285             break;
2286          }
2287       }
2288 
2289    } else {
2290       unit = inst->Src[sampler].Register.Index;
2291    }
2292    return unit;
2293 }
2294 
2295 /*
2296  * execute a texture instruction.
2297  *
2298  * modifier is used to control the channel routing for the
2299  * instruction variants like proj, lod, and texture with lod bias.
2300  * sampler indicates which src register the sampler is contained in.
2301  */
2302 static void
exec_tex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,uint sampler)2303 exec_tex(struct tgsi_exec_machine *mach,
2304          const struct tgsi_full_instruction *inst,
2305          uint modifier, uint sampler)
2306 {
2307    const union tgsi_exec_channel *args[5], *proj = NULL;
2308    union tgsi_exec_channel r[5];
2309    enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2310    uint chan;
2311    uint unit;
2312    int8_t offsets[3];
2313    int dim, shadow_ref, i;
2314 
2315    unit = fetch_sampler_unit(mach, inst, sampler);
2316    /* always fetch all 3 offsets, overkill but keeps code simple */
2317    fetch_texel_offsets(mach, inst, offsets);
2318 
2319    assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2320    assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2321 
2322    dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2323    shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2324 
2325    assert(dim <= 4);
2326    if (shadow_ref >= 0)
2327       assert(shadow_ref >= dim && shadow_ref < ARRAY_SIZE(args));
2328 
2329    /* fetch modifier to the last argument */
2330    if (modifier != TEX_MODIFIER_NONE) {
2331       const int last = ARRAY_SIZE(args) - 1;
2332 
2333       /* fetch modifier from src0.w or src1.x */
2334       if (sampler == 1) {
2335          assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2336          FETCH(&r[last], 0, TGSI_CHAN_W);
2337       }
2338       else {
2339          assert(shadow_ref != 4);
2340          FETCH(&r[last], 1, TGSI_CHAN_X);
2341       }
2342 
2343       if (modifier != TEX_MODIFIER_PROJECTED) {
2344          args[last] = &r[last];
2345       }
2346       else {
2347          proj = &r[last];
2348          args[last] = &ZeroVec;
2349       }
2350 
2351       /* point unused arguments to zero vector */
2352       for (i = dim; i < last; i++)
2353          args[i] = &ZeroVec;
2354 
2355       if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2356          control = TGSI_SAMPLER_LOD_EXPLICIT;
2357       else if (modifier == TEX_MODIFIER_LOD_BIAS)
2358          control = TGSI_SAMPLER_LOD_BIAS;
2359       else if (modifier == TEX_MODIFIER_GATHER)
2360          control = TGSI_SAMPLER_GATHER;
2361    }
2362    else {
2363       for (i = dim; i < ARRAY_SIZE(args); i++)
2364          args[i] = &ZeroVec;
2365    }
2366 
2367    /* fetch coordinates */
2368    for (i = 0; i < dim; i++) {
2369       FETCH(&r[i], 0, TGSI_CHAN_X + i);
2370 
2371       if (proj)
2372          micro_div(&r[i], &r[i], proj);
2373 
2374       args[i] = &r[i];
2375    }
2376 
2377    /* fetch reference value */
2378    if (shadow_ref >= 0) {
2379       FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2380 
2381       if (proj)
2382          micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2383 
2384       args[shadow_ref] = &r[shadow_ref];
2385    }
2386 
2387    fetch_texel(mach->Sampler, unit, unit,
2388          args[0], args[1], args[2], args[3], args[4],
2389          NULL, offsets, control,
2390          &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2391 
2392 #if 0
2393    debug_printf("fetch r: %g %g %g %g\n",
2394          r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2395    debug_printf("fetch g: %g %g %g %g\n",
2396          r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2397    debug_printf("fetch b: %g %g %g %g\n",
2398          r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2399    debug_printf("fetch a: %g %g %g %g\n",
2400          r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2401 #endif
2402 
2403    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2404       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2405          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2406       }
2407    }
2408 }
2409 
2410 static void
exec_lodq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2411 exec_lodq(struct tgsi_exec_machine *mach,
2412           const struct tgsi_full_instruction *inst)
2413 {
2414    uint unit;
2415    int dim;
2416    int i;
2417    union tgsi_exec_channel coords[4];
2418    const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2419    union tgsi_exec_channel r[2];
2420 
2421    unit = fetch_sampler_unit(mach, inst, 1);
2422    dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2423    assert(dim <= ARRAY_SIZE(coords));
2424    /* fetch coordinates */
2425    for (i = 0; i < dim; i++) {
2426       FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2427       args[i] = &coords[i];
2428    }
2429    for (i = dim; i < ARRAY_SIZE(coords); i++) {
2430       args[i] = &ZeroVec;
2431    }
2432    mach->Sampler->query_lod(mach->Sampler, unit, unit,
2433                             args[0]->f,
2434                             args[1]->f,
2435                             args[2]->f,
2436                             args[3]->f,
2437                             TGSI_SAMPLER_LOD_NONE,
2438                             r[0].f,
2439                             r[1].f);
2440 
2441    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2442       store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2443                  TGSI_EXEC_DATA_FLOAT);
2444    }
2445    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2446       store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2447                  TGSI_EXEC_DATA_FLOAT);
2448    }
2449 }
2450 
2451 static void
exec_txd(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2452 exec_txd(struct tgsi_exec_machine *mach,
2453          const struct tgsi_full_instruction *inst)
2454 {
2455    union tgsi_exec_channel r[4];
2456    float derivs[3][2][TGSI_QUAD_SIZE];
2457    uint chan;
2458    uint unit;
2459    int8_t offsets[3];
2460 
2461    unit = fetch_sampler_unit(mach, inst, 3);
2462    /* always fetch all 3 offsets, overkill but keeps code simple */
2463    fetch_texel_offsets(mach, inst, offsets);
2464 
2465    switch (inst->Texture.Texture) {
2466    case TGSI_TEXTURE_1D:
2467       FETCH(&r[0], 0, TGSI_CHAN_X);
2468 
2469       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2470 
2471       fetch_texel(mach->Sampler, unit, unit,
2472                   &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2473                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2474                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2475       break;
2476 
2477    case TGSI_TEXTURE_SHADOW1D:
2478    case TGSI_TEXTURE_1D_ARRAY:
2479    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2480       /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2481       FETCH(&r[0], 0, TGSI_CHAN_X);
2482       FETCH(&r[1], 0, TGSI_CHAN_Y);
2483       FETCH(&r[2], 0, TGSI_CHAN_Z);
2484 
2485       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2486 
2487       fetch_texel(mach->Sampler, unit, unit,
2488                   &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2489                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2490                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2491       break;
2492 
2493    case TGSI_TEXTURE_2D:
2494    case TGSI_TEXTURE_RECT:
2495       FETCH(&r[0], 0, TGSI_CHAN_X);
2496       FETCH(&r[1], 0, TGSI_CHAN_Y);
2497 
2498       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2499       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2500 
2501       fetch_texel(mach->Sampler, unit, unit,
2502                   &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2503                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2504                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2505       break;
2506 
2507 
2508    case TGSI_TEXTURE_SHADOW2D:
2509    case TGSI_TEXTURE_SHADOWRECT:
2510    case TGSI_TEXTURE_2D_ARRAY:
2511    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2512       /* only SHADOW2D_ARRAY actually needs W */
2513       FETCH(&r[0], 0, TGSI_CHAN_X);
2514       FETCH(&r[1], 0, TGSI_CHAN_Y);
2515       FETCH(&r[2], 0, TGSI_CHAN_Z);
2516       FETCH(&r[3], 0, TGSI_CHAN_W);
2517 
2518       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2519       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2520 
2521       fetch_texel(mach->Sampler, unit, unit,
2522                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
2523                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2524                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2525       break;
2526 
2527    case TGSI_TEXTURE_3D:
2528    case TGSI_TEXTURE_CUBE:
2529    case TGSI_TEXTURE_CUBE_ARRAY:
2530    case TGSI_TEXTURE_SHADOWCUBE:
2531       /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2532       FETCH(&r[0], 0, TGSI_CHAN_X);
2533       FETCH(&r[1], 0, TGSI_CHAN_Y);
2534       FETCH(&r[2], 0, TGSI_CHAN_Z);
2535       FETCH(&r[3], 0, TGSI_CHAN_W);
2536 
2537       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2538       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2539       fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2540 
2541       fetch_texel(mach->Sampler, unit, unit,
2542                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
2543                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2544                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2545       break;
2546 
2547    default:
2548       assert(0);
2549    }
2550 
2551    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2552       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2553          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2554       }
2555    }
2556 }
2557 
2558 
2559 static void
exec_txf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2560 exec_txf(struct tgsi_exec_machine *mach,
2561          const struct tgsi_full_instruction *inst)
2562 {
2563    union tgsi_exec_channel r[4];
2564    uint chan;
2565    uint unit;
2566    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2567    int j;
2568    int8_t offsets[3];
2569    unsigned target;
2570 
2571    unit = fetch_sampler_unit(mach, inst, 1);
2572    /* always fetch all 3 offsets, overkill but keeps code simple */
2573    fetch_texel_offsets(mach, inst, offsets);
2574 
2575    IFETCH(&r[3], 0, TGSI_CHAN_W);
2576 
2577    if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2578        inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2579       target = mach->SamplerViews[unit].Resource;
2580    }
2581    else {
2582       target = inst->Texture.Texture;
2583    }
2584    switch(target) {
2585    case TGSI_TEXTURE_3D:
2586    case TGSI_TEXTURE_2D_ARRAY:
2587    case TGSI_TEXTURE_SHADOW2D_ARRAY:
2588    case TGSI_TEXTURE_2D_ARRAY_MSAA:
2589       IFETCH(&r[2], 0, TGSI_CHAN_Z);
2590       /* fallthrough */
2591    case TGSI_TEXTURE_2D:
2592    case TGSI_TEXTURE_RECT:
2593    case TGSI_TEXTURE_SHADOW1D_ARRAY:
2594    case TGSI_TEXTURE_SHADOW2D:
2595    case TGSI_TEXTURE_SHADOWRECT:
2596    case TGSI_TEXTURE_1D_ARRAY:
2597    case TGSI_TEXTURE_2D_MSAA:
2598       IFETCH(&r[1], 0, TGSI_CHAN_Y);
2599       /* fallthrough */
2600    case TGSI_TEXTURE_BUFFER:
2601    case TGSI_TEXTURE_1D:
2602    case TGSI_TEXTURE_SHADOW1D:
2603       IFETCH(&r[0], 0, TGSI_CHAN_X);
2604       break;
2605    default:
2606       assert(0);
2607       break;
2608    }
2609 
2610    mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2611                             offsets, rgba);
2612 
2613    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2614       r[0].f[j] = rgba[0][j];
2615       r[1].f[j] = rgba[1][j];
2616       r[2].f[j] = rgba[2][j];
2617       r[3].f[j] = rgba[3][j];
2618    }
2619 
2620    if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2621        inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2622       unsigned char swizzles[4];
2623       swizzles[0] = inst->Src[1].Register.SwizzleX;
2624       swizzles[1] = inst->Src[1].Register.SwizzleY;
2625       swizzles[2] = inst->Src[1].Register.SwizzleZ;
2626       swizzles[3] = inst->Src[1].Register.SwizzleW;
2627 
2628       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2629          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2630             store_dest(mach, &r[swizzles[chan]],
2631                        &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2632          }
2633       }
2634    }
2635    else {
2636       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2637          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2638             store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2639          }
2640       }
2641    }
2642 }
2643 
2644 static void
exec_txq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2645 exec_txq(struct tgsi_exec_machine *mach,
2646          const struct tgsi_full_instruction *inst)
2647 {
2648    int result[4];
2649    union tgsi_exec_channel r[4], src;
2650    uint chan;
2651    uint unit;
2652    int i,j;
2653 
2654    unit = fetch_sampler_unit(mach, inst, 1);
2655 
2656    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2657 
2658    /* XXX: This interface can't return per-pixel values */
2659    mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2660 
2661    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2662       for (j = 0; j < 4; j++) {
2663          r[j].i[i] = result[j];
2664       }
2665    }
2666 
2667    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2668       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2669          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2670                     TGSI_EXEC_DATA_INT);
2671       }
2672    }
2673 }
2674 
2675 static void
exec_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,boolean compare)2676 exec_sample(struct tgsi_exec_machine *mach,
2677             const struct tgsi_full_instruction *inst,
2678             uint modifier, boolean compare)
2679 {
2680    const uint resource_unit = inst->Src[1].Register.Index;
2681    const uint sampler_unit = inst->Src[2].Register.Index;
2682    union tgsi_exec_channel r[5], c1;
2683    const union tgsi_exec_channel *lod = &ZeroVec;
2684    enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2685    uint chan;
2686    unsigned char swizzles[4];
2687    int8_t offsets[3];
2688 
2689    /* always fetch all 3 offsets, overkill but keeps code simple */
2690    fetch_texel_offsets(mach, inst, offsets);
2691 
2692    assert(modifier != TEX_MODIFIER_PROJECTED);
2693 
2694    if (modifier != TEX_MODIFIER_NONE) {
2695       if (modifier == TEX_MODIFIER_LOD_BIAS) {
2696          FETCH(&c1, 3, TGSI_CHAN_X);
2697          lod = &c1;
2698          control = TGSI_SAMPLER_LOD_BIAS;
2699       }
2700       else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2701          FETCH(&c1, 3, TGSI_CHAN_X);
2702          lod = &c1;
2703          control = TGSI_SAMPLER_LOD_EXPLICIT;
2704       }
2705       else {
2706          assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2707          control = TGSI_SAMPLER_LOD_ZERO;
2708       }
2709    }
2710 
2711    FETCH(&r[0], 0, TGSI_CHAN_X);
2712 
2713    switch (mach->SamplerViews[resource_unit].Resource) {
2714    case TGSI_TEXTURE_1D:
2715       if (compare) {
2716          FETCH(&r[2], 3, TGSI_CHAN_X);
2717          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2718                      &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2719                      NULL, offsets, control,
2720                      &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2721       }
2722       else {
2723          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2724                      &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2725                      NULL, offsets, control,
2726                      &r[0], &r[1], &r[2], &r[3]);     /* R, G, B, A */
2727       }
2728       break;
2729 
2730    case TGSI_TEXTURE_1D_ARRAY:
2731    case TGSI_TEXTURE_2D:
2732    case TGSI_TEXTURE_RECT:
2733       FETCH(&r[1], 0, TGSI_CHAN_Y);
2734       if (compare) {
2735          FETCH(&r[2], 3, TGSI_CHAN_X);
2736          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2737                      &r[0], &r[1], &r[2], &ZeroVec, lod,    /* S, T, P, C, LOD */
2738                      NULL, offsets, control,
2739                      &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2740       }
2741       else {
2742          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2743                      &r[0], &r[1], &ZeroVec, &ZeroVec, lod,    /* S, T, P, C, LOD */
2744                      NULL, offsets, control,
2745                      &r[0], &r[1], &r[2], &r[3]);  /* outputs */
2746       }
2747       break;
2748 
2749    case TGSI_TEXTURE_2D_ARRAY:
2750    case TGSI_TEXTURE_3D:
2751    case TGSI_TEXTURE_CUBE:
2752       FETCH(&r[1], 0, TGSI_CHAN_Y);
2753       FETCH(&r[2], 0, TGSI_CHAN_Z);
2754       if(compare) {
2755          FETCH(&r[3], 3, TGSI_CHAN_X);
2756          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2757                      &r[0], &r[1], &r[2], &r[3], lod,
2758                      NULL, offsets, control,
2759                      &r[0], &r[1], &r[2], &r[3]);
2760       }
2761       else {
2762          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2763                      &r[0], &r[1], &r[2], &ZeroVec, lod,
2764                      NULL, offsets, control,
2765                      &r[0], &r[1], &r[2], &r[3]);
2766       }
2767       break;
2768 
2769    case TGSI_TEXTURE_CUBE_ARRAY:
2770       FETCH(&r[1], 0, TGSI_CHAN_Y);
2771       FETCH(&r[2], 0, TGSI_CHAN_Z);
2772       FETCH(&r[3], 0, TGSI_CHAN_W);
2773       if(compare) {
2774          FETCH(&r[4], 3, TGSI_CHAN_X);
2775          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2776                      &r[0], &r[1], &r[2], &r[3], &r[4],
2777                      NULL, offsets, control,
2778                      &r[0], &r[1], &r[2], &r[3]);
2779       }
2780       else {
2781          fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2782                      &r[0], &r[1], &r[2], &r[3], lod,
2783                      NULL, offsets, control,
2784                      &r[0], &r[1], &r[2], &r[3]);
2785       }
2786       break;
2787 
2788 
2789    default:
2790       assert(0);
2791    }
2792 
2793    swizzles[0] = inst->Src[1].Register.SwizzleX;
2794    swizzles[1] = inst->Src[1].Register.SwizzleY;
2795    swizzles[2] = inst->Src[1].Register.SwizzleZ;
2796    swizzles[3] = inst->Src[1].Register.SwizzleW;
2797 
2798    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2799       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2800          store_dest(mach, &r[swizzles[chan]],
2801                     &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2802       }
2803    }
2804 }
2805 
2806 static void
exec_sample_d(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2807 exec_sample_d(struct tgsi_exec_machine *mach,
2808               const struct tgsi_full_instruction *inst)
2809 {
2810    const uint resource_unit = inst->Src[1].Register.Index;
2811    const uint sampler_unit = inst->Src[2].Register.Index;
2812    union tgsi_exec_channel r[4];
2813    float derivs[3][2][TGSI_QUAD_SIZE];
2814    uint chan;
2815    unsigned char swizzles[4];
2816    int8_t offsets[3];
2817 
2818    /* always fetch all 3 offsets, overkill but keeps code simple */
2819    fetch_texel_offsets(mach, inst, offsets);
2820 
2821    FETCH(&r[0], 0, TGSI_CHAN_X);
2822 
2823    switch (mach->SamplerViews[resource_unit].Resource) {
2824    case TGSI_TEXTURE_1D:
2825    case TGSI_TEXTURE_1D_ARRAY:
2826       /* only 1D array actually needs Y */
2827       FETCH(&r[1], 0, TGSI_CHAN_Y);
2828 
2829       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2830 
2831       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2832                   &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
2833                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2834                   &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
2835       break;
2836 
2837    case TGSI_TEXTURE_2D:
2838    case TGSI_TEXTURE_RECT:
2839    case TGSI_TEXTURE_2D_ARRAY:
2840       /* only 2D array actually needs Z */
2841       FETCH(&r[1], 0, TGSI_CHAN_Y);
2842       FETCH(&r[2], 0, TGSI_CHAN_Z);
2843 
2844       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2845       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2846 
2847       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2848                   &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
2849                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2850                   &r[0], &r[1], &r[2], &r[3]);     /* outputs */
2851       break;
2852 
2853    case TGSI_TEXTURE_3D:
2854    case TGSI_TEXTURE_CUBE:
2855    case TGSI_TEXTURE_CUBE_ARRAY:
2856       /* only cube array actually needs W */
2857       FETCH(&r[1], 0, TGSI_CHAN_Y);
2858       FETCH(&r[2], 0, TGSI_CHAN_Z);
2859       FETCH(&r[3], 0, TGSI_CHAN_W);
2860 
2861       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2862       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2863       fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2864 
2865       fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2866                   &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2867                   derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2868                   &r[0], &r[1], &r[2], &r[3]);
2869       break;
2870 
2871    default:
2872       assert(0);
2873    }
2874 
2875    swizzles[0] = inst->Src[1].Register.SwizzleX;
2876    swizzles[1] = inst->Src[1].Register.SwizzleY;
2877    swizzles[2] = inst->Src[1].Register.SwizzleZ;
2878    swizzles[3] = inst->Src[1].Register.SwizzleW;
2879 
2880    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2881       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2882          store_dest(mach, &r[swizzles[chan]],
2883                     &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2884       }
2885    }
2886 }
2887 
2888 
2889 /**
2890  * Evaluate a constant-valued coefficient at the position of the
2891  * current quad.
2892  */
2893 static void
eval_constant_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2894 eval_constant_coef(
2895    struct tgsi_exec_machine *mach,
2896    unsigned attrib,
2897    unsigned chan )
2898 {
2899    unsigned i;
2900 
2901    for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2902       mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2903    }
2904 }
2905 
2906 /**
2907  * Evaluate a linear-valued coefficient at the position of the
2908  * current quad.
2909  */
2910 static void
eval_linear_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2911 eval_linear_coef(
2912    struct tgsi_exec_machine *mach,
2913    unsigned attrib,
2914    unsigned chan )
2915 {
2916    const float x = mach->QuadPos.xyzw[0].f[0];
2917    const float y = mach->QuadPos.xyzw[1].f[0];
2918    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2919    const float dady = mach->InterpCoefs[attrib].dady[chan];
2920    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2921    mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2922    mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2923    mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2924    mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2925 }
2926 
2927 /**
2928  * Evaluate a perspective-valued coefficient at the position of the
2929  * current quad.
2930  */
2931 static void
eval_perspective_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2932 eval_perspective_coef(
2933    struct tgsi_exec_machine *mach,
2934    unsigned attrib,
2935    unsigned chan )
2936 {
2937    const float x = mach->QuadPos.xyzw[0].f[0];
2938    const float y = mach->QuadPos.xyzw[1].f[0];
2939    const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2940    const float dady = mach->InterpCoefs[attrib].dady[chan];
2941    const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2942    const float *w = mach->QuadPos.xyzw[3].f;
2943    /* divide by W here */
2944    mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2945    mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2946    mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2947    mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2948 }
2949 
2950 
2951 typedef void (* eval_coef_func)(
2952    struct tgsi_exec_machine *mach,
2953    unsigned attrib,
2954    unsigned chan );
2955 
2956 static void
exec_declaration(struct tgsi_exec_machine * mach,const struct tgsi_full_declaration * decl)2957 exec_declaration(struct tgsi_exec_machine *mach,
2958                  const struct tgsi_full_declaration *decl)
2959 {
2960    if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2961       mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2962       return;
2963    }
2964 
2965    if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
2966       if (decl->Declaration.File == TGSI_FILE_INPUT) {
2967          uint first, last, mask;
2968 
2969          first = decl->Range.First;
2970          last = decl->Range.Last;
2971          mask = decl->Declaration.UsageMask;
2972 
2973          /* XXX we could remove this special-case code since
2974           * mach->InterpCoefs[first].a0 should already have the
2975           * front/back-face value.  But we should first update the
2976           * ureg code to emit the right UsageMask value (WRITEMASK_X).
2977           * Then, we could remove the tgsi_exec_machine::Face field.
2978           */
2979          /* XXX make FACE a system value */
2980          if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2981             uint i;
2982 
2983             assert(decl->Semantic.Index == 0);
2984             assert(first == last);
2985 
2986             for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2987                mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2988             }
2989          } else {
2990             eval_coef_func eval;
2991             uint i, j;
2992 
2993             switch (decl->Interp.Interpolate) {
2994             case TGSI_INTERPOLATE_CONSTANT:
2995                eval = eval_constant_coef;
2996                break;
2997 
2998             case TGSI_INTERPOLATE_LINEAR:
2999                eval = eval_linear_coef;
3000                break;
3001 
3002             case TGSI_INTERPOLATE_PERSPECTIVE:
3003                eval = eval_perspective_coef;
3004                break;
3005 
3006             case TGSI_INTERPOLATE_COLOR:
3007                eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
3008                break;
3009 
3010             default:
3011                assert(0);
3012                return;
3013             }
3014 
3015             for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3016                if (mask & (1 << j)) {
3017                   for (i = first; i <= last; i++) {
3018                      eval(mach, i, j);
3019                   }
3020                }
3021             }
3022          }
3023 
3024          if (DEBUG_EXECUTION) {
3025             uint i, j;
3026             for (i = first; i <= last; ++i) {
3027                debug_printf("IN[%2u] = ", i);
3028                for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3029                   if (j > 0) {
3030                      debug_printf("         ");
3031                   }
3032                   debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3033                                mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
3034                                mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
3035                                mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
3036                                mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
3037                }
3038             }
3039          }
3040       }
3041    }
3042 
3043 }
3044 
3045 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
3046                                 const union tgsi_exec_channel *src);
3047 
3048 static void
exec_scalar_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3049 exec_scalar_unary(struct tgsi_exec_machine *mach,
3050                   const struct tgsi_full_instruction *inst,
3051                   micro_unary_op op,
3052                   enum tgsi_exec_datatype dst_datatype,
3053                   enum tgsi_exec_datatype src_datatype)
3054 {
3055    unsigned int chan;
3056    union tgsi_exec_channel src;
3057    union tgsi_exec_channel dst;
3058 
3059    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
3060    op(&dst, &src);
3061    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3062       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3063          store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3064       }
3065    }
3066 }
3067 
3068 static void
exec_vector_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3069 exec_vector_unary(struct tgsi_exec_machine *mach,
3070                   const struct tgsi_full_instruction *inst,
3071                   micro_unary_op op,
3072                   enum tgsi_exec_datatype dst_datatype,
3073                   enum tgsi_exec_datatype src_datatype)
3074 {
3075    unsigned int chan;
3076    struct tgsi_exec_vector dst;
3077 
3078    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3079       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3080          union tgsi_exec_channel src;
3081 
3082          fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
3083          op(&dst.xyzw[chan], &src);
3084       }
3085    }
3086    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3087       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3088          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3089       }
3090    }
3091 }
3092 
3093 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
3094                                  const union tgsi_exec_channel *src0,
3095                                  const union tgsi_exec_channel *src1);
3096 
3097 static void
exec_scalar_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3098 exec_scalar_binary(struct tgsi_exec_machine *mach,
3099                    const struct tgsi_full_instruction *inst,
3100                    micro_binary_op op,
3101                    enum tgsi_exec_datatype dst_datatype,
3102                    enum tgsi_exec_datatype src_datatype)
3103 {
3104    unsigned int chan;
3105    union tgsi_exec_channel src[2];
3106    union tgsi_exec_channel dst;
3107 
3108    fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
3109    fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
3110    op(&dst, &src[0], &src[1]);
3111    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3112       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3113          store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3114       }
3115    }
3116 }
3117 
3118 static void
exec_vector_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3119 exec_vector_binary(struct tgsi_exec_machine *mach,
3120                    const struct tgsi_full_instruction *inst,
3121                    micro_binary_op op,
3122                    enum tgsi_exec_datatype dst_datatype,
3123                    enum tgsi_exec_datatype src_datatype)
3124 {
3125    unsigned int chan;
3126    struct tgsi_exec_vector dst;
3127 
3128    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3129       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3130          union tgsi_exec_channel src[2];
3131 
3132          fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3133          fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3134          op(&dst.xyzw[chan], &src[0], &src[1]);
3135       }
3136    }
3137    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3138       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3139          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3140       }
3141    }
3142 }
3143 
3144 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
3145                                   const union tgsi_exec_channel *src0,
3146                                   const union tgsi_exec_channel *src1,
3147                                   const union tgsi_exec_channel *src2);
3148 
3149 static void
exec_vector_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_trinary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3150 exec_vector_trinary(struct tgsi_exec_machine *mach,
3151                     const struct tgsi_full_instruction *inst,
3152                     micro_trinary_op op,
3153                     enum tgsi_exec_datatype dst_datatype,
3154                     enum tgsi_exec_datatype src_datatype)
3155 {
3156    unsigned int chan;
3157    struct tgsi_exec_vector dst;
3158 
3159    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3160       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3161          union tgsi_exec_channel src[3];
3162 
3163          fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3164          fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3165          fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3166          op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3167       }
3168    }
3169    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3170       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3171          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3172       }
3173    }
3174 }
3175 
3176 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
3177                                      const union tgsi_exec_channel *src0,
3178                                      const union tgsi_exec_channel *src1,
3179                                      const union tgsi_exec_channel *src2,
3180                                      const union tgsi_exec_channel *src3);
3181 
3182 static void
exec_vector_quaternary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_quaternary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3183 exec_vector_quaternary(struct tgsi_exec_machine *mach,
3184                        const struct tgsi_full_instruction *inst,
3185                        micro_quaternary_op op,
3186                        enum tgsi_exec_datatype dst_datatype,
3187                        enum tgsi_exec_datatype src_datatype)
3188 {
3189    unsigned int chan;
3190    struct tgsi_exec_vector dst;
3191 
3192    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3193       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3194          union tgsi_exec_channel src[4];
3195 
3196          fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3197          fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3198          fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3199          fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3200          op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3201       }
3202    }
3203    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3204       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3205          store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3206       }
3207    }
3208 }
3209 
3210 static void
exec_dp3(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3211 exec_dp3(struct tgsi_exec_machine *mach,
3212          const struct tgsi_full_instruction *inst)
3213 {
3214    unsigned int chan;
3215    union tgsi_exec_channel arg[3];
3216 
3217    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3218    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3219    micro_mul(&arg[2], &arg[0], &arg[1]);
3220 
3221    for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3222       fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3223       fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3224       micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3225    }
3226 
3227    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3228       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3229          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3230       }
3231    }
3232 }
3233 
3234 static void
exec_dp4(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3235 exec_dp4(struct tgsi_exec_machine *mach,
3236          const struct tgsi_full_instruction *inst)
3237 {
3238    unsigned int chan;
3239    union tgsi_exec_channel arg[3];
3240 
3241    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3242    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3243    micro_mul(&arg[2], &arg[0], &arg[1]);
3244 
3245    for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3246       fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3247       fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3248       micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3249    }
3250 
3251    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3252       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3253          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3254       }
3255    }
3256 }
3257 
3258 static void
exec_dp2a(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3259 exec_dp2a(struct tgsi_exec_machine *mach,
3260           const struct tgsi_full_instruction *inst)
3261 {
3262    unsigned int chan;
3263    union tgsi_exec_channel arg[3];
3264 
3265    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3266    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3267    micro_mul(&arg[2], &arg[0], &arg[1]);
3268 
3269    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3270    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3271    micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
3272 
3273    fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3274    micro_add(&arg[0], &arg[0], &arg[1]);
3275 
3276    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3277       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3278          store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3279       }
3280    }
3281 }
3282 
3283 static void
exec_dph(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3284 exec_dph(struct tgsi_exec_machine *mach,
3285          const struct tgsi_full_instruction *inst)
3286 {
3287    unsigned int chan;
3288    union tgsi_exec_channel arg[3];
3289 
3290    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3291    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3292    micro_mul(&arg[2], &arg[0], &arg[1]);
3293 
3294    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3295    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3296    micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3297 
3298    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3299    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3300    micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
3301 
3302    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3303    micro_add(&arg[0], &arg[0], &arg[1]);
3304 
3305    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3306       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3307          store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3308       }
3309    }
3310 }
3311 
3312 static void
exec_dp2(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3313 exec_dp2(struct tgsi_exec_machine *mach,
3314          const struct tgsi_full_instruction *inst)
3315 {
3316    unsigned int chan;
3317    union tgsi_exec_channel arg[3];
3318 
3319    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3320    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3321    micro_mul(&arg[2], &arg[0], &arg[1]);
3322 
3323    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3324    fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3325    micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3326 
3327    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3328       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3329          store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3330       }
3331    }
3332 }
3333 
3334 static void
exec_pk2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3335 exec_pk2h(struct tgsi_exec_machine *mach,
3336           const struct tgsi_full_instruction *inst)
3337 {
3338    unsigned chan;
3339    union tgsi_exec_channel arg[2], dst;
3340 
3341    fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3342    fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3343    for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3344       dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
3345          (util_float_to_half(arg[1].f[chan]) << 16);
3346    }
3347    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3348       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3349          store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
3350       }
3351    }
3352 }
3353 
3354 static void
exec_up2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3355 exec_up2h(struct tgsi_exec_machine *mach,
3356           const struct tgsi_full_instruction *inst)
3357 {
3358    unsigned chan;
3359    union tgsi_exec_channel arg, dst[2];
3360 
3361    fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3362    for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3363       dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
3364       dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
3365    }
3366    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3367       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3368          store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3369       }
3370    }
3371 }
3372 
3373 static void
exec_scs(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3374 exec_scs(struct tgsi_exec_machine *mach,
3375          const struct tgsi_full_instruction *inst)
3376 {
3377    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
3378       union tgsi_exec_channel arg;
3379       union tgsi_exec_channel result;
3380 
3381       fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3382 
3383       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3384          micro_cos(&result, &arg);
3385          store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3386       }
3387       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3388          micro_sin(&result, &arg);
3389          store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3390       }
3391    }
3392    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3393       store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3394    }
3395    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3396       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3397    }
3398 }
3399 
3400 static void
exec_xpd(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3401 exec_xpd(struct tgsi_exec_machine *mach,
3402          const struct tgsi_full_instruction *inst)
3403 {
3404    union tgsi_exec_channel r[6];
3405    union tgsi_exec_channel d[3];
3406 
3407    fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3408    fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3409 
3410    micro_mul(&r[2], &r[0], &r[1]);
3411 
3412    fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3413    fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3414 
3415    micro_mul(&r[5], &r[3], &r[4] );
3416    micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
3417 
3418    fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3419 
3420    micro_mul(&r[3], &r[3], &r[2]);
3421 
3422    fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3423 
3424    micro_mul(&r[1], &r[1], &r[5]);
3425    micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
3426 
3427    micro_mul(&r[5], &r[5], &r[4]);
3428    micro_mul(&r[0], &r[0], &r[2]);
3429    micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
3430 
3431    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3432       store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3433    }
3434    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3435       store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3436    }
3437    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3438       store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3439    }
3440    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3441       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3442    }
3443 }
3444 
3445 static void
exec_dst(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3446 exec_dst(struct tgsi_exec_machine *mach,
3447          const struct tgsi_full_instruction *inst)
3448 {
3449    union tgsi_exec_channel r[2];
3450    union tgsi_exec_channel d[4];
3451 
3452    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3453       fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3454       fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3455       micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3456    }
3457    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3458       fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3459    }
3460    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3461       fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3462    }
3463 
3464    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3465       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3466    }
3467    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3468       store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3469    }
3470    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3471       store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3472    }
3473    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3474       store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3475    }
3476 }
3477 
3478 static void
exec_log(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3479 exec_log(struct tgsi_exec_machine *mach,
3480          const struct tgsi_full_instruction *inst)
3481 {
3482    union tgsi_exec_channel r[3];
3483 
3484    fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3485    micro_abs(&r[2], &r[0]);  /* r2 = abs(r0) */
3486    micro_lg2(&r[1], &r[2]);  /* r1 = lg2(r2) */
3487    micro_flr(&r[0], &r[1]);  /* r0 = floor(r1) */
3488    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3489       store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3490    }
3491    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3492       micro_exp2(&r[0], &r[0]);       /* r0 = 2 ^ r0 */
3493       micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3494       store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3495    }
3496    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3497       store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3498    }
3499    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3500       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3501    }
3502 }
3503 
3504 static void
exec_exp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3505 exec_exp(struct tgsi_exec_machine *mach,
3506          const struct tgsi_full_instruction *inst)
3507 {
3508    union tgsi_exec_channel r[3];
3509 
3510    fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3511    micro_flr(&r[1], &r[0]);  /* r1 = floor(r0) */
3512    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3513       micro_exp2(&r[2], &r[1]);       /* r2 = 2 ^ r1 */
3514       store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3515    }
3516    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3517       micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3518       store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3519    }
3520    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3521       micro_exp2(&r[2], &r[0]);       /* r2 = 2 ^ r0 */
3522       store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3523    }
3524    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3525       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3526    }
3527 }
3528 
3529 static void
exec_lit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3530 exec_lit(struct tgsi_exec_machine *mach,
3531          const struct tgsi_full_instruction *inst)
3532 {
3533    union tgsi_exec_channel r[3];
3534    union tgsi_exec_channel d[3];
3535 
3536    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3537       fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3538       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3539          fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3540          micro_max(&r[1], &r[1], &ZeroVec);
3541 
3542          fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3543          micro_min(&r[2], &r[2], &P128Vec);
3544          micro_max(&r[2], &r[2], &M128Vec);
3545          micro_pow(&r[1], &r[1], &r[2]);
3546          micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3547          store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3548       }
3549       if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3550          micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3551          store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3552       }
3553    }
3554    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3555       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3556    }
3557 
3558    if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3559       store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3560    }
3561 }
3562 
3563 static void
exec_break(struct tgsi_exec_machine * mach)3564 exec_break(struct tgsi_exec_machine *mach)
3565 {
3566    if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3567       /* turn off loop channels for each enabled exec channel */
3568       mach->LoopMask &= ~mach->ExecMask;
3569       /* Todo: if mach->LoopMask == 0, jump to end of loop */
3570       UPDATE_EXEC_MASK(mach);
3571    } else {
3572       assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3573 
3574       mach->Switch.mask = 0x0;
3575 
3576       UPDATE_EXEC_MASK(mach);
3577    }
3578 }
3579 
3580 static void
exec_switch(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3581 exec_switch(struct tgsi_exec_machine *mach,
3582             const struct tgsi_full_instruction *inst)
3583 {
3584    assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3585    assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3586 
3587    mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3588    fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3589    mach->Switch.mask = 0x0;
3590    mach->Switch.defaultMask = 0x0;
3591 
3592    mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3593    mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3594 
3595    UPDATE_EXEC_MASK(mach);
3596 }
3597 
3598 static void
exec_case(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3599 exec_case(struct tgsi_exec_machine *mach,
3600           const struct tgsi_full_instruction *inst)
3601 {
3602    uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3603    union tgsi_exec_channel src;
3604    uint mask = 0;
3605 
3606    fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3607 
3608    if (mach->Switch.selector.u[0] == src.u[0]) {
3609       mask |= 0x1;
3610    }
3611    if (mach->Switch.selector.u[1] == src.u[1]) {
3612       mask |= 0x2;
3613    }
3614    if (mach->Switch.selector.u[2] == src.u[2]) {
3615       mask |= 0x4;
3616    }
3617    if (mach->Switch.selector.u[3] == src.u[3]) {
3618       mask |= 0x8;
3619    }
3620 
3621    mach->Switch.defaultMask |= mask;
3622 
3623    mach->Switch.mask |= mask & prevMask;
3624 
3625    UPDATE_EXEC_MASK(mach);
3626 }
3627 
3628 /* FIXME: this will only work if default is last */
3629 static void
exec_default(struct tgsi_exec_machine * mach)3630 exec_default(struct tgsi_exec_machine *mach)
3631 {
3632    uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3633 
3634    mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3635 
3636    UPDATE_EXEC_MASK(mach);
3637 }
3638 
3639 static void
exec_endswitch(struct tgsi_exec_machine * mach)3640 exec_endswitch(struct tgsi_exec_machine *mach)
3641 {
3642    mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3643    mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3644 
3645    UPDATE_EXEC_MASK(mach);
3646 }
3647 
3648 typedef void (* micro_dop)(union tgsi_double_channel *dst,
3649                            const union tgsi_double_channel *src);
3650 
3651 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
3652                                const union tgsi_double_channel *src0,
3653                                union tgsi_exec_channel *src1);
3654 
3655 typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
3656                              const union tgsi_exec_channel *src);
3657 
3658 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
3659                              const union tgsi_double_channel *src);
3660 
3661 static void
fetch_double_channel(struct tgsi_exec_machine * mach,union tgsi_double_channel * chan,const struct tgsi_full_src_register * reg,uint chan_0,uint chan_1)3662 fetch_double_channel(struct tgsi_exec_machine *mach,
3663                      union tgsi_double_channel *chan,
3664                      const struct tgsi_full_src_register *reg,
3665                      uint chan_0,
3666                      uint chan_1)
3667 {
3668    union tgsi_exec_channel src[2];
3669    uint i;
3670 
3671    fetch_source_d(mach, &src[0], reg, chan_0, TGSI_EXEC_DATA_UINT);
3672    fetch_source_d(mach, &src[1], reg, chan_1, TGSI_EXEC_DATA_UINT);
3673 
3674    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3675       chan->u[i][0] = src[0].u[i];
3676       chan->u[i][1] = src[1].u[i];
3677    }
3678    if (reg->Register.Absolute) {
3679       micro_dabs(chan, chan);
3680    }
3681    if (reg->Register.Negate) {
3682       micro_dneg(chan, chan);
3683    }
3684 }
3685 
3686 static void
store_double_channel(struct tgsi_exec_machine * mach,const union tgsi_double_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_0,uint chan_1)3687 store_double_channel(struct tgsi_exec_machine *mach,
3688                      const union tgsi_double_channel *chan,
3689                      const struct tgsi_full_dst_register *reg,
3690                      const struct tgsi_full_instruction *inst,
3691                      uint chan_0,
3692                      uint chan_1)
3693 {
3694    union tgsi_exec_channel dst[2];
3695    uint i;
3696    union tgsi_double_channel temp;
3697    const uint execmask = mach->ExecMask;
3698 
3699    if (!inst->Instruction.Saturate) {
3700       for (i = 0; i < TGSI_QUAD_SIZE; i++)
3701          if (execmask & (1 << i)) {
3702             dst[0].u[i] = chan->u[i][0];
3703             dst[1].u[i] = chan->u[i][1];
3704          }
3705    }
3706    else {
3707       for (i = 0; i < TGSI_QUAD_SIZE; i++)
3708          if (execmask & (1 << i)) {
3709             if (chan->d[i] < 0.0)
3710                temp.d[i] = 0.0;
3711             else if (chan->d[i] > 1.0)
3712                temp.d[i] = 1.0;
3713             else
3714                temp.d[i] = chan->d[i];
3715 
3716             dst[0].u[i] = temp.u[i][0];
3717             dst[1].u[i] = temp.u[i][1];
3718          }
3719    }
3720 
3721    store_dest_double(mach, &dst[0], reg, inst, chan_0, TGSI_EXEC_DATA_UINT);
3722    if (chan_1 != -1)
3723       store_dest_double(mach, &dst[1], reg, inst, chan_1, TGSI_EXEC_DATA_UINT);
3724 }
3725 
3726 static void
exec_double_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3727 exec_double_unary(struct tgsi_exec_machine *mach,
3728                   const struct tgsi_full_instruction *inst,
3729                   micro_dop op)
3730 {
3731    union tgsi_double_channel src;
3732    union tgsi_double_channel dst;
3733 
3734    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3735       fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3736       op(&dst, &src);
3737       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3738    }
3739    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3740       fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3741       op(&dst, &src);
3742       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3743    }
3744 }
3745 
3746 static void
exec_double_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op,enum tgsi_exec_datatype dst_datatype)3747 exec_double_binary(struct tgsi_exec_machine *mach,
3748                    const struct tgsi_full_instruction *inst,
3749                    micro_dop op,
3750                    enum tgsi_exec_datatype dst_datatype)
3751 {
3752    union tgsi_double_channel src[2];
3753    union tgsi_double_channel dst;
3754    int first_dest_chan, second_dest_chan;
3755    int wmask;
3756 
3757    wmask = inst->Dst[0].Register.WriteMask;
3758    /* these are & because of the way DSLT etc store their destinations */
3759    if (wmask & TGSI_WRITEMASK_XY) {
3760       first_dest_chan = TGSI_CHAN_X;
3761       second_dest_chan = TGSI_CHAN_Y;
3762       if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3763          first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
3764          second_dest_chan = -1;
3765       }
3766 
3767       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3768       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3769       op(&dst, src);
3770       store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3771    }
3772 
3773    if (wmask & TGSI_WRITEMASK_ZW) {
3774       first_dest_chan = TGSI_CHAN_Z;
3775       second_dest_chan = TGSI_CHAN_W;
3776       if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3777          first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
3778          second_dest_chan = -1;
3779       }
3780 
3781       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3782       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3783       op(&dst, src);
3784       store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3785    }
3786 }
3787 
3788 static void
exec_double_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3789 exec_double_trinary(struct tgsi_exec_machine *mach,
3790                     const struct tgsi_full_instruction *inst,
3791                     micro_dop op)
3792 {
3793    union tgsi_double_channel src[3];
3794    union tgsi_double_channel dst;
3795 
3796    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3797       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3798       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3799       fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
3800       op(&dst, src);
3801       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3802    }
3803    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3804       fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3805       fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3806       fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
3807       op(&dst, src);
3808       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3809    }
3810 }
3811 
3812 static void
exec_dldexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3813 exec_dldexp(struct tgsi_exec_machine *mach,
3814             const struct tgsi_full_instruction *inst)
3815 {
3816    union tgsi_double_channel src0;
3817    union tgsi_exec_channel src1;
3818    union tgsi_double_channel dst;
3819    int wmask;
3820 
3821    wmask = inst->Dst[0].Register.WriteMask;
3822    if (wmask & TGSI_WRITEMASK_XY) {
3823       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3824       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3825       micro_dldexp(&dst, &src0, &src1);
3826       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3827    }
3828 
3829    if (wmask & TGSI_WRITEMASK_ZW) {
3830       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3831       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3832       micro_dldexp(&dst, &src0, &src1);
3833       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3834    }
3835 }
3836 
3837 static void
exec_dfracexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3838 exec_dfracexp(struct tgsi_exec_machine *mach,
3839               const struct tgsi_full_instruction *inst)
3840 {
3841    union tgsi_double_channel src;
3842    union tgsi_double_channel dst;
3843    union tgsi_exec_channel dst_exp;
3844 
3845    if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) {
3846       fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3847       micro_dfracexp(&dst, &dst_exp, &src);
3848       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3849       store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
3850    }
3851    if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
3852       fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3853       micro_dfracexp(&dst, &dst_exp, &src);
3854       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3855       store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
3856    }
3857 }
3858 
3859 static void
exec_arg0_64_arg1_32(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_sop op)3860 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
3861             const struct tgsi_full_instruction *inst,
3862             micro_dop_sop op)
3863 {
3864    union tgsi_double_channel src0;
3865    union tgsi_exec_channel src1;
3866    union tgsi_double_channel dst;
3867    int wmask;
3868 
3869    wmask = inst->Dst[0].Register.WriteMask;
3870    if (wmask & TGSI_WRITEMASK_XY) {
3871       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3872       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3873       op(&dst, &src0, &src1);
3874       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3875    }
3876 
3877    if (wmask & TGSI_WRITEMASK_ZW) {
3878       fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3879       fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3880       op(&dst, &src0, &src1);
3881       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3882    }
3883 }
3884 
3885 static int
get_image_coord_dim(unsigned tgsi_tex)3886 get_image_coord_dim(unsigned tgsi_tex)
3887 {
3888    int dim;
3889    switch (tgsi_tex) {
3890    case TGSI_TEXTURE_BUFFER:
3891    case TGSI_TEXTURE_1D:
3892       dim = 1;
3893       break;
3894    case TGSI_TEXTURE_2D:
3895    case TGSI_TEXTURE_RECT:
3896    case TGSI_TEXTURE_1D_ARRAY:
3897    case TGSI_TEXTURE_2D_MSAA:
3898       dim = 2;
3899       break;
3900    case TGSI_TEXTURE_3D:
3901    case TGSI_TEXTURE_CUBE:
3902    case TGSI_TEXTURE_2D_ARRAY:
3903    case TGSI_TEXTURE_2D_ARRAY_MSAA:
3904    case TGSI_TEXTURE_CUBE_ARRAY:
3905       dim = 3;
3906       break;
3907    default:
3908       assert(!"unknown texture target");
3909       dim = 0;
3910       break;
3911    }
3912 
3913    return dim;
3914 }
3915 
3916 static int
get_image_coord_sample(unsigned tgsi_tex)3917 get_image_coord_sample(unsigned tgsi_tex)
3918 {
3919    int sample = 0;
3920    switch (tgsi_tex) {
3921    case TGSI_TEXTURE_2D_MSAA:
3922       sample = 3;
3923       break;
3924    case TGSI_TEXTURE_2D_ARRAY_MSAA:
3925       sample = 4;
3926       break;
3927    default:
3928       break;
3929    }
3930    return sample;
3931 }
3932 
3933 static void
exec_load_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3934 exec_load_img(struct tgsi_exec_machine *mach,
3935               const struct tgsi_full_instruction *inst)
3936 {
3937    union tgsi_exec_channel r[4], sample_r;
3938    uint unit;
3939    int sample;
3940    int i, j;
3941    int dim;
3942    uint chan;
3943    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3944    struct tgsi_image_params params;
3945    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3946 
3947    unit = fetch_sampler_unit(mach, inst, 0);
3948    dim = get_image_coord_dim(inst->Memory.Texture);
3949    sample = get_image_coord_sample(inst->Memory.Texture);
3950    assert(dim <= 3);
3951 
3952    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3953    params.unit = unit;
3954    params.tgsi_tex_instr = inst->Memory.Texture;
3955    params.format = inst->Memory.Format;
3956 
3957    for (i = 0; i < dim; i++) {
3958       IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3959    }
3960 
3961    if (sample)
3962       IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3963 
3964    mach->Image->load(mach->Image, &params,
3965                      r[0].i, r[1].i, r[2].i, sample_r.i,
3966                      rgba);
3967    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3968       r[0].f[j] = rgba[0][j];
3969       r[1].f[j] = rgba[1][j];
3970       r[2].f[j] = rgba[2][j];
3971       r[3].f[j] = rgba[3][j];
3972    }
3973    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3974       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3975          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3976       }
3977    }
3978 }
3979 
3980 static void
exec_load_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3981 exec_load_buf(struct tgsi_exec_machine *mach,
3982               const struct tgsi_full_instruction *inst)
3983 {
3984    union tgsi_exec_channel r[4];
3985    uint unit;
3986    int j;
3987    uint chan;
3988    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3989    struct tgsi_buffer_params params;
3990    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3991 
3992    unit = fetch_sampler_unit(mach, inst, 0);
3993 
3994    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3995    params.unit = unit;
3996    IFETCH(&r[0], 1, TGSI_CHAN_X);
3997 
3998    mach->Buffer->load(mach->Buffer, &params,
3999                       r[0].i, rgba);
4000    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4001       r[0].f[j] = rgba[0][j];
4002       r[1].f[j] = rgba[1][j];
4003       r[2].f[j] = rgba[2][j];
4004       r[3].f[j] = rgba[3][j];
4005    }
4006    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4007       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4008          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4009       }
4010    }
4011 }
4012 
4013 static void
exec_load_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4014 exec_load_mem(struct tgsi_exec_machine *mach,
4015               const struct tgsi_full_instruction *inst)
4016 {
4017    union tgsi_exec_channel r[4];
4018    uint chan;
4019    char *ptr = mach->LocalMem;
4020    uint32_t offset;
4021    int j;
4022 
4023    IFETCH(&r[0], 1, TGSI_CHAN_X);
4024    if (r[0].u[0] >= mach->LocalMemSize)
4025       return;
4026 
4027    offset = r[0].u[0];
4028    ptr += offset;
4029 
4030    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4031       for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4032          if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4033             memcpy(&r[chan].u[j], ptr + (4 * chan), 4);
4034          }
4035       }
4036    }
4037 
4038    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4039       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4040          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4041       }
4042    }
4043 }
4044 
4045 static void
exec_load(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4046 exec_load(struct tgsi_exec_machine *mach,
4047           const struct tgsi_full_instruction *inst)
4048 {
4049    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4050       exec_load_img(mach, inst);
4051    else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4052       exec_load_buf(mach, inst);
4053    else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4054       exec_load_mem(mach, inst);
4055 }
4056 
4057 static void
exec_store_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4058 exec_store_img(struct tgsi_exec_machine *mach,
4059                const struct tgsi_full_instruction *inst)
4060 {
4061    union tgsi_exec_channel r[3], sample_r;
4062    union tgsi_exec_channel value[4];
4063    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4064    struct tgsi_image_params params;
4065    int dim;
4066    int sample;
4067    int i, j;
4068    uint unit;
4069    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4070    unit = inst->Dst[0].Register.Index;
4071    dim = get_image_coord_dim(inst->Memory.Texture);
4072    sample = get_image_coord_sample(inst->Memory.Texture);
4073    assert(dim <= 3);
4074 
4075    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4076    params.unit = unit;
4077    params.tgsi_tex_instr = inst->Memory.Texture;
4078    params.format = inst->Memory.Format;
4079 
4080    for (i = 0; i < dim; i++) {
4081       IFETCH(&r[i], 0, TGSI_CHAN_X + i);
4082    }
4083 
4084    for (i = 0; i < 4; i++) {
4085       FETCH(&value[i], 1, TGSI_CHAN_X + i);
4086    }
4087    if (sample)
4088       IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
4089 
4090    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4091       rgba[0][j] = value[0].f[j];
4092       rgba[1][j] = value[1].f[j];
4093       rgba[2][j] = value[2].f[j];
4094       rgba[3][j] = value[3].f[j];
4095    }
4096 
4097    mach->Image->store(mach->Image, &params,
4098                       r[0].i, r[1].i, r[2].i, sample_r.i,
4099                       rgba);
4100 }
4101 
4102 static void
exec_store_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4103 exec_store_buf(struct tgsi_exec_machine *mach,
4104                const struct tgsi_full_instruction *inst)
4105 {
4106    union tgsi_exec_channel r[3];
4107    union tgsi_exec_channel value[4];
4108    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4109    struct tgsi_buffer_params params;
4110    int i, j;
4111    uint unit;
4112    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4113 
4114    unit = inst->Dst[0].Register.Index;
4115 
4116    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4117    params.unit = unit;
4118    params.writemask = inst->Dst[0].Register.WriteMask;
4119 
4120    IFETCH(&r[0], 0, TGSI_CHAN_X);
4121    for (i = 0; i < 4; i++) {
4122       FETCH(&value[i], 1, TGSI_CHAN_X + i);
4123    }
4124 
4125    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4126       rgba[0][j] = value[0].f[j];
4127       rgba[1][j] = value[1].f[j];
4128       rgba[2][j] = value[2].f[j];
4129       rgba[3][j] = value[3].f[j];
4130    }
4131 
4132    mach->Buffer->store(mach->Buffer, &params,
4133                       r[0].i,
4134                       rgba);
4135 }
4136 
4137 static void
exec_store_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4138 exec_store_mem(struct tgsi_exec_machine *mach,
4139                const struct tgsi_full_instruction *inst)
4140 {
4141    union tgsi_exec_channel r[3];
4142    union tgsi_exec_channel value[4];
4143    uint i, chan;
4144    char *ptr = mach->LocalMem;
4145    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4146    int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4147 
4148    IFETCH(&r[0], 0, TGSI_CHAN_X);
4149 
4150    for (i = 0; i < 4; i++) {
4151       FETCH(&value[i], 1, TGSI_CHAN_X + i);
4152    }
4153 
4154    if (r[0].u[0] >= mach->LocalMemSize)
4155       return;
4156    ptr += r[0].u[0];
4157 
4158    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4159       if (execmask & (1 << i)) {
4160          for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4161             if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4162                memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
4163             }
4164          }
4165       }
4166    }
4167 }
4168 
4169 static void
exec_store(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4170 exec_store(struct tgsi_exec_machine *mach,
4171            const struct tgsi_full_instruction *inst)
4172 {
4173    if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
4174       exec_store_img(mach, inst);
4175    else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
4176       exec_store_buf(mach, inst);
4177    else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
4178       exec_store_mem(mach, inst);
4179 }
4180 
4181 static void
exec_atomop_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4182 exec_atomop_img(struct tgsi_exec_machine *mach,
4183                 const struct tgsi_full_instruction *inst)
4184 {
4185    union tgsi_exec_channel r[4], sample_r;
4186    union tgsi_exec_channel value[4], value2[4];
4187    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4188    float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4189    struct tgsi_image_params params;
4190    int dim;
4191    int sample;
4192    int i, j;
4193    uint unit, chan;
4194    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4195    unit = fetch_sampler_unit(mach, inst, 0);
4196    dim = get_image_coord_dim(inst->Memory.Texture);
4197    sample = get_image_coord_sample(inst->Memory.Texture);
4198    assert(dim <= 3);
4199 
4200    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4201    params.unit = unit;
4202    params.tgsi_tex_instr = inst->Memory.Texture;
4203    params.format = inst->Memory.Format;
4204 
4205    for (i = 0; i < dim; i++) {
4206       IFETCH(&r[i], 1, TGSI_CHAN_X + i);
4207    }
4208 
4209    for (i = 0; i < 4; i++) {
4210       FETCH(&value[i], 2, TGSI_CHAN_X + i);
4211       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4212          FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4213    }
4214    if (sample)
4215       IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
4216 
4217    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4218       rgba[0][j] = value[0].f[j];
4219       rgba[1][j] = value[1].f[j];
4220       rgba[2][j] = value[2].f[j];
4221       rgba[3][j] = value[3].f[j];
4222    }
4223    if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4224       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4225          rgba2[0][j] = value2[0].f[j];
4226          rgba2[1][j] = value2[1].f[j];
4227          rgba2[2][j] = value2[2].f[j];
4228          rgba2[3][j] = value2[3].f[j];
4229       }
4230    }
4231 
4232    mach->Image->op(mach->Image, &params, inst->Instruction.Opcode,
4233                    r[0].i, r[1].i, r[2].i, sample_r.i,
4234                    rgba, rgba2);
4235 
4236    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4237       r[0].f[j] = rgba[0][j];
4238       r[1].f[j] = rgba[1][j];
4239       r[2].f[j] = rgba[2][j];
4240       r[3].f[j] = rgba[3][j];
4241    }
4242    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4243       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4244          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4245       }
4246    }
4247 }
4248 
4249 static void
exec_atomop_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4250 exec_atomop_buf(struct tgsi_exec_machine *mach,
4251                 const struct tgsi_full_instruction *inst)
4252 {
4253    union tgsi_exec_channel r[4];
4254    union tgsi_exec_channel value[4], value2[4];
4255    float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4256    float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4257    struct tgsi_buffer_params params;
4258    int i, j;
4259    uint unit, chan;
4260    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4261 
4262    unit = fetch_sampler_unit(mach, inst, 0);
4263 
4264    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4265    params.unit = unit;
4266    params.writemask = inst->Dst[0].Register.WriteMask;
4267 
4268    IFETCH(&r[0], 1, TGSI_CHAN_X);
4269 
4270    for (i = 0; i < 4; i++) {
4271       FETCH(&value[i], 2, TGSI_CHAN_X + i);
4272       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4273          FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4274    }
4275 
4276    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4277       rgba[0][j] = value[0].f[j];
4278       rgba[1][j] = value[1].f[j];
4279       rgba[2][j] = value[2].f[j];
4280       rgba[3][j] = value[3].f[j];
4281    }
4282    if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4283       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4284          rgba2[0][j] = value2[0].f[j];
4285          rgba2[1][j] = value2[1].f[j];
4286          rgba2[2][j] = value2[2].f[j];
4287          rgba2[3][j] = value2[3].f[j];
4288       }
4289    }
4290 
4291    mach->Buffer->op(mach->Buffer, &params, inst->Instruction.Opcode,
4292                    r[0].i,
4293                    rgba, rgba2);
4294 
4295    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4296       r[0].f[j] = rgba[0][j];
4297       r[1].f[j] = rgba[1][j];
4298       r[2].f[j] = rgba[2][j];
4299       r[3].f[j] = rgba[3][j];
4300    }
4301    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4302       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4303          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4304       }
4305    }
4306 }
4307 
4308 static void
exec_atomop_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4309 exec_atomop_mem(struct tgsi_exec_machine *mach,
4310                 const struct tgsi_full_instruction *inst)
4311 {
4312    union tgsi_exec_channel r[4];
4313    union tgsi_exec_channel value[4], value2[4];
4314    char *ptr = mach->LocalMem;
4315    uint32_t val;
4316    uint chan, i;
4317    uint32_t offset;
4318    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4319    int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4320    IFETCH(&r[0], 1, TGSI_CHAN_X);
4321 
4322    if (r[0].u[0] >= mach->LocalMemSize)
4323       return;
4324 
4325    offset = r[0].u[0];
4326    ptr += offset;
4327    for (i = 0; i < 4; i++) {
4328       FETCH(&value[i], 2, TGSI_CHAN_X + i);
4329       if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4330          FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4331    }
4332 
4333    memcpy(&r[0].u[0], ptr, 4);
4334    val = r[0].u[0];
4335    switch (inst->Instruction.Opcode) {
4336    case TGSI_OPCODE_ATOMUADD:
4337       val += value[0].u[0];
4338       break;
4339    case TGSI_OPCODE_ATOMXOR:
4340       val ^= value[0].u[0];
4341       break;
4342    case TGSI_OPCODE_ATOMOR:
4343       val |= value[0].u[0];
4344       break;
4345    case TGSI_OPCODE_ATOMAND:
4346       val &= value[0].u[0];
4347       break;
4348    case TGSI_OPCODE_ATOMUMIN:
4349       val = MIN2(val, value[0].u[0]);
4350       break;
4351    case TGSI_OPCODE_ATOMUMAX:
4352       val = MAX2(val, value[0].u[0]);
4353       break;
4354    case TGSI_OPCODE_ATOMIMIN:
4355       val = MIN2(r[0].i[0], value[0].i[0]);
4356       break;
4357    case TGSI_OPCODE_ATOMIMAX:
4358       val = MAX2(r[0].i[0], value[0].i[0]);
4359       break;
4360    case TGSI_OPCODE_ATOMXCHG:
4361       val = value[0].i[0];
4362       break;
4363    case TGSI_OPCODE_ATOMCAS:
4364       if (val == value[0].u[0])
4365          val = value2[0].u[0];
4366       break;
4367    default:
4368       break;
4369    }
4370    for (i = 0; i < TGSI_QUAD_SIZE; i++)
4371       if (execmask & (1 << i))
4372          memcpy(ptr, &val, 4);
4373 
4374    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4375       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4376          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4377       }
4378    }
4379 }
4380 
4381 static void
exec_atomop(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4382 exec_atomop(struct tgsi_exec_machine *mach,
4383             const struct tgsi_full_instruction *inst)
4384 {
4385    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4386       exec_atomop_img(mach, inst);
4387    else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4388       exec_atomop_buf(mach, inst);
4389    else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4390       exec_atomop_mem(mach, inst);
4391 }
4392 
4393 static void
exec_resq_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4394 exec_resq_img(struct tgsi_exec_machine *mach,
4395               const struct tgsi_full_instruction *inst)
4396 {
4397    int result[4];
4398    union tgsi_exec_channel r[4];
4399    uint unit;
4400    int i, chan, j;
4401    struct tgsi_image_params params;
4402    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4403 
4404    unit = fetch_sampler_unit(mach, inst, 0);
4405 
4406    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4407    params.unit = unit;
4408    params.tgsi_tex_instr = inst->Memory.Texture;
4409    params.format = inst->Memory.Format;
4410 
4411    mach->Image->get_dims(mach->Image, &params, result);
4412 
4413    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4414       for (j = 0; j < 4; j++) {
4415          r[j].i[i] = result[j];
4416       }
4417    }
4418 
4419    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4420       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4421          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4422                     TGSI_EXEC_DATA_INT);
4423       }
4424    }
4425 }
4426 
4427 static void
exec_resq_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4428 exec_resq_buf(struct tgsi_exec_machine *mach,
4429               const struct tgsi_full_instruction *inst)
4430 {
4431    int result;
4432    union tgsi_exec_channel r[4];
4433    uint unit;
4434    int i, chan;
4435    struct tgsi_buffer_params params;
4436    int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4437 
4438    unit = fetch_sampler_unit(mach, inst, 0);
4439 
4440    params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4441    params.unit = unit;
4442 
4443    mach->Buffer->get_dims(mach->Buffer, &params, &result);
4444 
4445    for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4446       r[0].i[i] = result;
4447    }
4448 
4449    for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4450       if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4451          store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4452                     TGSI_EXEC_DATA_INT);
4453       }
4454    }
4455 }
4456 
4457 static void
exec_resq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4458 exec_resq(struct tgsi_exec_machine *mach,
4459           const struct tgsi_full_instruction *inst)
4460 {
4461    if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4462       exec_resq_img(mach, inst);
4463    else
4464       exec_resq_buf(mach, inst);
4465 }
4466 
4467 static void
micro_f2u64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4468 micro_f2u64(union tgsi_double_channel *dst,
4469             const union tgsi_exec_channel *src)
4470 {
4471    dst->u64[0] = (uint64_t)src->f[0];
4472    dst->u64[1] = (uint64_t)src->f[1];
4473    dst->u64[2] = (uint64_t)src->f[2];
4474    dst->u64[3] = (uint64_t)src->f[3];
4475 }
4476 
4477 static void
micro_f2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4478 micro_f2i64(union tgsi_double_channel *dst,
4479             const union tgsi_exec_channel *src)
4480 {
4481    dst->i64[0] = (int64_t)src->f[0];
4482    dst->i64[1] = (int64_t)src->f[1];
4483    dst->i64[2] = (int64_t)src->f[2];
4484    dst->i64[3] = (int64_t)src->f[3];
4485 }
4486 
4487 static void
micro_u2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4488 micro_u2i64(union tgsi_double_channel *dst,
4489             const union tgsi_exec_channel *src)
4490 {
4491    dst->u64[0] = (uint64_t)src->u[0];
4492    dst->u64[1] = (uint64_t)src->u[1];
4493    dst->u64[2] = (uint64_t)src->u[2];
4494    dst->u64[3] = (uint64_t)src->u[3];
4495 }
4496 
4497 static void
micro_i2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4498 micro_i2i64(union tgsi_double_channel *dst,
4499             const union tgsi_exec_channel *src)
4500 {
4501    dst->i64[0] = (int64_t)src->i[0];
4502    dst->i64[1] = (int64_t)src->i[1];
4503    dst->i64[2] = (int64_t)src->i[2];
4504    dst->i64[3] = (int64_t)src->i[3];
4505 }
4506 
4507 static void
micro_d2u64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4508 micro_d2u64(union tgsi_double_channel *dst,
4509            const union tgsi_double_channel *src)
4510 {
4511    dst->u64[0] = (uint64_t)src->d[0];
4512    dst->u64[1] = (uint64_t)src->d[1];
4513    dst->u64[2] = (uint64_t)src->d[2];
4514    dst->u64[3] = (uint64_t)src->d[3];
4515 }
4516 
4517 static void
micro_d2i64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4518 micro_d2i64(union tgsi_double_channel *dst,
4519            const union tgsi_double_channel *src)
4520 {
4521    dst->i64[0] = (int64_t)src->d[0];
4522    dst->i64[1] = (int64_t)src->d[1];
4523    dst->i64[2] = (int64_t)src->d[2];
4524    dst->i64[3] = (int64_t)src->d[3];
4525 }
4526 
4527 static void
micro_u642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4528 micro_u642d(union tgsi_double_channel *dst,
4529            const union tgsi_double_channel *src)
4530 {
4531    dst->d[0] = (double)src->u64[0];
4532    dst->d[1] = (double)src->u64[1];
4533    dst->d[2] = (double)src->u64[2];
4534    dst->d[3] = (double)src->u64[3];
4535 }
4536 
4537 static void
micro_i642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4538 micro_i642d(union tgsi_double_channel *dst,
4539            const union tgsi_double_channel *src)
4540 {
4541    dst->d[0] = (double)src->i64[0];
4542    dst->d[1] = (double)src->i64[1];
4543    dst->d[2] = (double)src->i64[2];
4544    dst->d[3] = (double)src->i64[3];
4545 }
4546 
4547 static void
micro_u642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4548 micro_u642f(union tgsi_exec_channel *dst,
4549             const union tgsi_double_channel *src)
4550 {
4551    dst->f[0] = (float)src->u64[0];
4552    dst->f[1] = (float)src->u64[1];
4553    dst->f[2] = (float)src->u64[2];
4554    dst->f[3] = (float)src->u64[3];
4555 }
4556 
4557 static void
micro_i642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4558 micro_i642f(union tgsi_exec_channel *dst,
4559             const union tgsi_double_channel *src)
4560 {
4561    dst->f[0] = (float)src->i64[0];
4562    dst->f[1] = (float)src->i64[1];
4563    dst->f[2] = (float)src->i64[2];
4564    dst->f[3] = (float)src->i64[3];
4565 }
4566 
4567 static void
exec_t_2_64(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_s op,enum tgsi_exec_datatype src_datatype)4568 exec_t_2_64(struct tgsi_exec_machine *mach,
4569           const struct tgsi_full_instruction *inst,
4570           micro_dop_s op,
4571           enum tgsi_exec_datatype src_datatype)
4572 {
4573    union tgsi_exec_channel src;
4574    union tgsi_double_channel dst;
4575 
4576    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
4577       fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
4578       op(&dst, &src);
4579       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
4580    }
4581    if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
4582       fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
4583       op(&dst, &src);
4584       store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
4585    }
4586 }
4587 
4588 static void
exec_64_2_t(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_sop_d op,enum tgsi_exec_datatype dst_datatype)4589 exec_64_2_t(struct tgsi_exec_machine *mach,
4590             const struct tgsi_full_instruction *inst,
4591             micro_sop_d op,
4592             enum tgsi_exec_datatype dst_datatype)
4593 {
4594    union tgsi_double_channel src;
4595    union tgsi_exec_channel dst;
4596    int wm = inst->Dst[0].Register.WriteMask;
4597    int i;
4598    int bit;
4599    for (i = 0; i < 2; i++) {
4600       bit = ffs(wm);
4601       if (bit) {
4602          wm &= ~(1 << (bit - 1));
4603          if (i == 0)
4604             fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
4605          else
4606             fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
4607          op(&dst, &src);
4608          store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype);
4609       }
4610    }
4611 }
4612 
4613 static void
micro_i2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4614 micro_i2f(union tgsi_exec_channel *dst,
4615           const union tgsi_exec_channel *src)
4616 {
4617    dst->f[0] = (float)src->i[0];
4618    dst->f[1] = (float)src->i[1];
4619    dst->f[2] = (float)src->i[2];
4620    dst->f[3] = (float)src->i[3];
4621 }
4622 
4623 static void
micro_not(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4624 micro_not(union tgsi_exec_channel *dst,
4625           const union tgsi_exec_channel *src)
4626 {
4627    dst->u[0] = ~src->u[0];
4628    dst->u[1] = ~src->u[1];
4629    dst->u[2] = ~src->u[2];
4630    dst->u[3] = ~src->u[3];
4631 }
4632 
4633 static void
micro_shl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4634 micro_shl(union tgsi_exec_channel *dst,
4635           const union tgsi_exec_channel *src0,
4636           const union tgsi_exec_channel *src1)
4637 {
4638    unsigned masked_count;
4639    masked_count = src1->u[0] & 0x1f;
4640    dst->u[0] = src0->u[0] << masked_count;
4641    masked_count = src1->u[1] & 0x1f;
4642    dst->u[1] = src0->u[1] << masked_count;
4643    masked_count = src1->u[2] & 0x1f;
4644    dst->u[2] = src0->u[2] << masked_count;
4645    masked_count = src1->u[3] & 0x1f;
4646    dst->u[3] = src0->u[3] << masked_count;
4647 }
4648 
4649 static void
micro_and(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4650 micro_and(union tgsi_exec_channel *dst,
4651           const union tgsi_exec_channel *src0,
4652           const union tgsi_exec_channel *src1)
4653 {
4654    dst->u[0] = src0->u[0] & src1->u[0];
4655    dst->u[1] = src0->u[1] & src1->u[1];
4656    dst->u[2] = src0->u[2] & src1->u[2];
4657    dst->u[3] = src0->u[3] & src1->u[3];
4658 }
4659 
4660 static void
micro_or(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4661 micro_or(union tgsi_exec_channel *dst,
4662          const union tgsi_exec_channel *src0,
4663          const union tgsi_exec_channel *src1)
4664 {
4665    dst->u[0] = src0->u[0] | src1->u[0];
4666    dst->u[1] = src0->u[1] | src1->u[1];
4667    dst->u[2] = src0->u[2] | src1->u[2];
4668    dst->u[3] = src0->u[3] | src1->u[3];
4669 }
4670 
4671 static void
micro_xor(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4672 micro_xor(union tgsi_exec_channel *dst,
4673           const union tgsi_exec_channel *src0,
4674           const union tgsi_exec_channel *src1)
4675 {
4676    dst->u[0] = src0->u[0] ^ src1->u[0];
4677    dst->u[1] = src0->u[1] ^ src1->u[1];
4678    dst->u[2] = src0->u[2] ^ src1->u[2];
4679    dst->u[3] = src0->u[3] ^ src1->u[3];
4680 }
4681 
4682 static void
micro_mod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4683 micro_mod(union tgsi_exec_channel *dst,
4684           const union tgsi_exec_channel *src0,
4685           const union tgsi_exec_channel *src1)
4686 {
4687    dst->i[0] = src0->i[0] % src1->i[0];
4688    dst->i[1] = src0->i[1] % src1->i[1];
4689    dst->i[2] = src0->i[2] % src1->i[2];
4690    dst->i[3] = src0->i[3] % src1->i[3];
4691 }
4692 
4693 static void
micro_f2i(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4694 micro_f2i(union tgsi_exec_channel *dst,
4695           const union tgsi_exec_channel *src)
4696 {
4697    dst->i[0] = (int)src->f[0];
4698    dst->i[1] = (int)src->f[1];
4699    dst->i[2] = (int)src->f[2];
4700    dst->i[3] = (int)src->f[3];
4701 }
4702 
4703 static void
micro_fseq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4704 micro_fseq(union tgsi_exec_channel *dst,
4705            const union tgsi_exec_channel *src0,
4706            const union tgsi_exec_channel *src1)
4707 {
4708    dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4709    dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4710    dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4711    dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4712 }
4713 
4714 static void
micro_fsge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4715 micro_fsge(union tgsi_exec_channel *dst,
4716            const union tgsi_exec_channel *src0,
4717            const union tgsi_exec_channel *src1)
4718 {
4719    dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4720    dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4721    dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4722    dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4723 }
4724 
4725 static void
micro_fslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4726 micro_fslt(union tgsi_exec_channel *dst,
4727            const union tgsi_exec_channel *src0,
4728            const union tgsi_exec_channel *src1)
4729 {
4730    dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4731    dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4732    dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4733    dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4734 }
4735 
4736 static void
micro_fsne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4737 micro_fsne(union tgsi_exec_channel *dst,
4738            const union tgsi_exec_channel *src0,
4739            const union tgsi_exec_channel *src1)
4740 {
4741    dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4742    dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4743    dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4744    dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4745 }
4746 
4747 static void
micro_idiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4748 micro_idiv(union tgsi_exec_channel *dst,
4749            const union tgsi_exec_channel *src0,
4750            const union tgsi_exec_channel *src1)
4751 {
4752    dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4753    dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4754    dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4755    dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
4756 }
4757 
4758 static void
micro_imax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4759 micro_imax(union tgsi_exec_channel *dst,
4760            const union tgsi_exec_channel *src0,
4761            const union tgsi_exec_channel *src1)
4762 {
4763    dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4764    dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4765    dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4766    dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4767 }
4768 
4769 static void
micro_imin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4770 micro_imin(union tgsi_exec_channel *dst,
4771            const union tgsi_exec_channel *src0,
4772            const union tgsi_exec_channel *src1)
4773 {
4774    dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4775    dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4776    dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4777    dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4778 }
4779 
4780 static void
micro_isge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4781 micro_isge(union tgsi_exec_channel *dst,
4782            const union tgsi_exec_channel *src0,
4783            const union tgsi_exec_channel *src1)
4784 {
4785    dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4786    dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4787    dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4788    dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
4789 }
4790 
4791 static void
micro_ishr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4792 micro_ishr(union tgsi_exec_channel *dst,
4793            const union tgsi_exec_channel *src0,
4794            const union tgsi_exec_channel *src1)
4795 {
4796    unsigned masked_count;
4797    masked_count = src1->i[0] & 0x1f;
4798    dst->i[0] = src0->i[0] >> masked_count;
4799    masked_count = src1->i[1] & 0x1f;
4800    dst->i[1] = src0->i[1] >> masked_count;
4801    masked_count = src1->i[2] & 0x1f;
4802    dst->i[2] = src0->i[2] >> masked_count;
4803    masked_count = src1->i[3] & 0x1f;
4804    dst->i[3] = src0->i[3] >> masked_count;
4805 }
4806 
4807 static void
micro_islt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4808 micro_islt(union tgsi_exec_channel *dst,
4809            const union tgsi_exec_channel *src0,
4810            const union tgsi_exec_channel *src1)
4811 {
4812    dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4813    dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4814    dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4815    dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4816 }
4817 
4818 static void
micro_f2u(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4819 micro_f2u(union tgsi_exec_channel *dst,
4820           const union tgsi_exec_channel *src)
4821 {
4822    dst->u[0] = (uint)src->f[0];
4823    dst->u[1] = (uint)src->f[1];
4824    dst->u[2] = (uint)src->f[2];
4825    dst->u[3] = (uint)src->f[3];
4826 }
4827 
4828 static void
micro_u2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4829 micro_u2f(union tgsi_exec_channel *dst,
4830           const union tgsi_exec_channel *src)
4831 {
4832    dst->f[0] = (float)src->u[0];
4833    dst->f[1] = (float)src->u[1];
4834    dst->f[2] = (float)src->u[2];
4835    dst->f[3] = (float)src->u[3];
4836 }
4837 
4838 static void
micro_uadd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4839 micro_uadd(union tgsi_exec_channel *dst,
4840            const union tgsi_exec_channel *src0,
4841            const union tgsi_exec_channel *src1)
4842 {
4843    dst->u[0] = src0->u[0] + src1->u[0];
4844    dst->u[1] = src0->u[1] + src1->u[1];
4845    dst->u[2] = src0->u[2] + src1->u[2];
4846    dst->u[3] = src0->u[3] + src1->u[3];
4847 }
4848 
4849 static void
micro_udiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4850 micro_udiv(union tgsi_exec_channel *dst,
4851            const union tgsi_exec_channel *src0,
4852            const union tgsi_exec_channel *src1)
4853 {
4854    dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4855    dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4856    dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4857    dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4858 }
4859 
4860 static void
micro_umad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4861 micro_umad(union tgsi_exec_channel *dst,
4862            const union tgsi_exec_channel *src0,
4863            const union tgsi_exec_channel *src1,
4864            const union tgsi_exec_channel *src2)
4865 {
4866    dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4867    dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4868    dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4869    dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4870 }
4871 
4872 static void
micro_umax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4873 micro_umax(union tgsi_exec_channel *dst,
4874            const union tgsi_exec_channel *src0,
4875            const union tgsi_exec_channel *src1)
4876 {
4877    dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4878    dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4879    dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4880    dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4881 }
4882 
4883 static void
micro_umin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4884 micro_umin(union tgsi_exec_channel *dst,
4885            const union tgsi_exec_channel *src0,
4886            const union tgsi_exec_channel *src1)
4887 {
4888    dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4889    dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4890    dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4891    dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4892 }
4893 
4894 static void
micro_umod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4895 micro_umod(union tgsi_exec_channel *dst,
4896            const union tgsi_exec_channel *src0,
4897            const union tgsi_exec_channel *src1)
4898 {
4899    dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4900    dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4901    dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4902    dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4903 }
4904 
4905 static void
micro_umul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4906 micro_umul(union tgsi_exec_channel *dst,
4907            const union tgsi_exec_channel *src0,
4908            const union tgsi_exec_channel *src1)
4909 {
4910    dst->u[0] = src0->u[0] * src1->u[0];
4911    dst->u[1] = src0->u[1] * src1->u[1];
4912    dst->u[2] = src0->u[2] * src1->u[2];
4913    dst->u[3] = src0->u[3] * src1->u[3];
4914 }
4915 
4916 static void
micro_imul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4917 micro_imul_hi(union tgsi_exec_channel *dst,
4918               const union tgsi_exec_channel *src0,
4919               const union tgsi_exec_channel *src1)
4920 {
4921 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4922    dst->i[0] = I64M(src0->i[0], src1->i[0]);
4923    dst->i[1] = I64M(src0->i[1], src1->i[1]);
4924    dst->i[2] = I64M(src0->i[2], src1->i[2]);
4925    dst->i[3] = I64M(src0->i[3], src1->i[3]);
4926 #undef I64M
4927 }
4928 
4929 static void
micro_umul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4930 micro_umul_hi(union tgsi_exec_channel *dst,
4931               const union tgsi_exec_channel *src0,
4932               const union tgsi_exec_channel *src1)
4933 {
4934 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4935    dst->u[0] = U64M(src0->u[0], src1->u[0]);
4936    dst->u[1] = U64M(src0->u[1], src1->u[1]);
4937    dst->u[2] = U64M(src0->u[2], src1->u[2]);
4938    dst->u[3] = U64M(src0->u[3], src1->u[3]);
4939 #undef U64M
4940 }
4941 
4942 static void
micro_useq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4943 micro_useq(union tgsi_exec_channel *dst,
4944            const union tgsi_exec_channel *src0,
4945            const union tgsi_exec_channel *src1)
4946 {
4947    dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4948    dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4949    dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4950    dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4951 }
4952 
4953 static void
micro_usge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4954 micro_usge(union tgsi_exec_channel *dst,
4955            const union tgsi_exec_channel *src0,
4956            const union tgsi_exec_channel *src1)
4957 {
4958    dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4959    dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4960    dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4961    dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4962 }
4963 
4964 static void
micro_ushr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4965 micro_ushr(union tgsi_exec_channel *dst,
4966            const union tgsi_exec_channel *src0,
4967            const union tgsi_exec_channel *src1)
4968 {
4969    unsigned masked_count;
4970    masked_count = src1->u[0] & 0x1f;
4971    dst->u[0] = src0->u[0] >> masked_count;
4972    masked_count = src1->u[1] & 0x1f;
4973    dst->u[1] = src0->u[1] >> masked_count;
4974    masked_count = src1->u[2] & 0x1f;
4975    dst->u[2] = src0->u[2] >> masked_count;
4976    masked_count = src1->u[3] & 0x1f;
4977    dst->u[3] = src0->u[3] >> masked_count;
4978 }
4979 
4980 static void
micro_uslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4981 micro_uslt(union tgsi_exec_channel *dst,
4982            const union tgsi_exec_channel *src0,
4983            const union tgsi_exec_channel *src1)
4984 {
4985    dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4986    dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4987    dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4988    dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4989 }
4990 
4991 static void
micro_usne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4992 micro_usne(union tgsi_exec_channel *dst,
4993            const union tgsi_exec_channel *src0,
4994            const union tgsi_exec_channel *src1)
4995 {
4996    dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4997    dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4998    dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4999    dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
5000 }
5001 
5002 static void
micro_uarl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5003 micro_uarl(union tgsi_exec_channel *dst,
5004            const union tgsi_exec_channel *src)
5005 {
5006    dst->i[0] = src->u[0];
5007    dst->i[1] = src->u[1];
5008    dst->i[2] = src->u[2];
5009    dst->i[3] = src->u[3];
5010 }
5011 
5012 static void
micro_ucmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)5013 micro_ucmp(union tgsi_exec_channel *dst,
5014            const union tgsi_exec_channel *src0,
5015            const union tgsi_exec_channel *src1,
5016            const union tgsi_exec_channel *src2)
5017 {
5018    dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
5019    dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
5020    dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
5021    dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
5022 }
5023 
5024 /**
5025  * Signed bitfield extract (i.e. sign-extend the extracted bits)
5026  */
5027 static void
micro_ibfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)5028 micro_ibfe(union tgsi_exec_channel *dst,
5029            const union tgsi_exec_channel *src0,
5030            const union tgsi_exec_channel *src1,
5031            const union tgsi_exec_channel *src2)
5032 {
5033    int i;
5034    for (i = 0; i < 4; i++) {
5035       int width = src2->i[i] & 0x1f;
5036       int offset = src1->i[i] & 0x1f;
5037       if (width == 0)
5038          dst->i[i] = 0;
5039       else if (width + offset < 32)
5040          dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
5041       else
5042          dst->i[i] = src0->i[i] >> offset;
5043    }
5044 }
5045 
5046 /**
5047  * Unsigned bitfield extract
5048  */
5049 static void
micro_ubfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)5050 micro_ubfe(union tgsi_exec_channel *dst,
5051            const union tgsi_exec_channel *src0,
5052            const union tgsi_exec_channel *src1,
5053            const union tgsi_exec_channel *src2)
5054 {
5055    int i;
5056    for (i = 0; i < 4; i++) {
5057       int width = src2->u[i] & 0x1f;
5058       int offset = src1->u[i] & 0x1f;
5059       if (width == 0)
5060          dst->u[i] = 0;
5061       else if (width + offset < 32)
5062          dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
5063       else
5064          dst->u[i] = src0->u[i] >> offset;
5065    }
5066 }
5067 
5068 /**
5069  * Bitfield insert: copy low bits from src1 into a region of src0.
5070  */
5071 static void
micro_bfi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)5072 micro_bfi(union tgsi_exec_channel *dst,
5073           const union tgsi_exec_channel *src0,
5074           const union tgsi_exec_channel *src1,
5075           const union tgsi_exec_channel *src2,
5076           const union tgsi_exec_channel *src3)
5077 {
5078    int i;
5079    for (i = 0; i < 4; i++) {
5080       int width = src3->u[i] & 0x1f;
5081       int offset = src2->u[i] & 0x1f;
5082       int bitmask = ((1 << width) - 1) << offset;
5083       dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
5084    }
5085 }
5086 
5087 static void
micro_brev(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5088 micro_brev(union tgsi_exec_channel *dst,
5089            const union tgsi_exec_channel *src)
5090 {
5091    dst->u[0] = util_bitreverse(src->u[0]);
5092    dst->u[1] = util_bitreverse(src->u[1]);
5093    dst->u[2] = util_bitreverse(src->u[2]);
5094    dst->u[3] = util_bitreverse(src->u[3]);
5095 }
5096 
5097 static void
micro_popc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5098 micro_popc(union tgsi_exec_channel *dst,
5099            const union tgsi_exec_channel *src)
5100 {
5101    dst->u[0] = util_bitcount(src->u[0]);
5102    dst->u[1] = util_bitcount(src->u[1]);
5103    dst->u[2] = util_bitcount(src->u[2]);
5104    dst->u[3] = util_bitcount(src->u[3]);
5105 }
5106 
5107 static void
micro_lsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5108 micro_lsb(union tgsi_exec_channel *dst,
5109           const union tgsi_exec_channel *src)
5110 {
5111    dst->i[0] = ffs(src->u[0]) - 1;
5112    dst->i[1] = ffs(src->u[1]) - 1;
5113    dst->i[2] = ffs(src->u[2]) - 1;
5114    dst->i[3] = ffs(src->u[3]) - 1;
5115 }
5116 
5117 static void
micro_imsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5118 micro_imsb(union tgsi_exec_channel *dst,
5119            const union tgsi_exec_channel *src)
5120 {
5121    dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
5122    dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
5123    dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
5124    dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
5125 }
5126 
5127 static void
micro_umsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5128 micro_umsb(union tgsi_exec_channel *dst,
5129            const union tgsi_exec_channel *src)
5130 {
5131    dst->i[0] = util_last_bit(src->u[0]) - 1;
5132    dst->i[1] = util_last_bit(src->u[1]) - 1;
5133    dst->i[2] = util_last_bit(src->u[2]) - 1;
5134    dst->i[3] = util_last_bit(src->u[3]) - 1;
5135 }
5136 
5137 /**
5138  * Execute a TGSI instruction.
5139  * Returns TRUE if a barrier instruction is hit,
5140  * otherwise FALSE.
5141  */
5142 static boolean
exec_instruction(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int * pc)5143 exec_instruction(
5144    struct tgsi_exec_machine *mach,
5145    const struct tgsi_full_instruction *inst,
5146    int *pc )
5147 {
5148    union tgsi_exec_channel r[10];
5149 
5150    (*pc)++;
5151 
5152    switch (inst->Instruction.Opcode) {
5153    case TGSI_OPCODE_ARL:
5154       exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5155       break;
5156 
5157    case TGSI_OPCODE_MOV:
5158       exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5159       break;
5160 
5161    case TGSI_OPCODE_LIT:
5162       exec_lit(mach, inst);
5163       break;
5164 
5165    case TGSI_OPCODE_RCP:
5166       exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5167       break;
5168 
5169    case TGSI_OPCODE_RSQ:
5170       exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5171       break;
5172 
5173    case TGSI_OPCODE_EXP:
5174       exec_exp(mach, inst);
5175       break;
5176 
5177    case TGSI_OPCODE_LOG:
5178       exec_log(mach, inst);
5179       break;
5180 
5181    case TGSI_OPCODE_MUL:
5182       exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5183       break;
5184 
5185    case TGSI_OPCODE_ADD:
5186       exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5187       break;
5188 
5189    case TGSI_OPCODE_DP3:
5190       exec_dp3(mach, inst);
5191       break;
5192 
5193    case TGSI_OPCODE_DP4:
5194       exec_dp4(mach, inst);
5195       break;
5196 
5197    case TGSI_OPCODE_DST:
5198       exec_dst(mach, inst);
5199       break;
5200 
5201    case TGSI_OPCODE_MIN:
5202       exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5203       break;
5204 
5205    case TGSI_OPCODE_MAX:
5206       exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5207       break;
5208 
5209    case TGSI_OPCODE_SLT:
5210       exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5211       break;
5212 
5213    case TGSI_OPCODE_SGE:
5214       exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5215       break;
5216 
5217    case TGSI_OPCODE_MAD:
5218       exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5219       break;
5220 
5221    case TGSI_OPCODE_LRP:
5222       exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5223       break;
5224 
5225    case TGSI_OPCODE_SQRT:
5226       exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5227       break;
5228 
5229    case TGSI_OPCODE_DP2A:
5230       exec_dp2a(mach, inst);
5231       break;
5232 
5233    case TGSI_OPCODE_FRC:
5234       exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5235       break;
5236 
5237    case TGSI_OPCODE_CLAMP:
5238       exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5239       break;
5240 
5241    case TGSI_OPCODE_FLR:
5242       exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5243       break;
5244 
5245    case TGSI_OPCODE_ROUND:
5246       exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5247       break;
5248 
5249    case TGSI_OPCODE_EX2:
5250       exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5251       break;
5252 
5253    case TGSI_OPCODE_LG2:
5254       exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5255       break;
5256 
5257    case TGSI_OPCODE_POW:
5258       exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5259       break;
5260 
5261    case TGSI_OPCODE_XPD:
5262       exec_xpd(mach, inst);
5263       break;
5264 
5265    case TGSI_OPCODE_DPH:
5266       exec_dph(mach, inst);
5267       break;
5268 
5269    case TGSI_OPCODE_COS:
5270       exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5271       break;
5272 
5273    case TGSI_OPCODE_DDX:
5274       exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5275       break;
5276 
5277    case TGSI_OPCODE_DDY:
5278       exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5279       break;
5280 
5281    case TGSI_OPCODE_KILL:
5282       exec_kill (mach, inst);
5283       break;
5284 
5285    case TGSI_OPCODE_KILL_IF:
5286       exec_kill_if (mach, inst);
5287       break;
5288 
5289    case TGSI_OPCODE_PK2H:
5290       exec_pk2h(mach, inst);
5291       break;
5292 
5293    case TGSI_OPCODE_PK2US:
5294       assert (0);
5295       break;
5296 
5297    case TGSI_OPCODE_PK4B:
5298       assert (0);
5299       break;
5300 
5301    case TGSI_OPCODE_PK4UB:
5302       assert (0);
5303       break;
5304 
5305    case TGSI_OPCODE_SEQ:
5306       exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5307       break;
5308 
5309    case TGSI_OPCODE_SGT:
5310       exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5311       break;
5312 
5313    case TGSI_OPCODE_SIN:
5314       exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5315       break;
5316 
5317    case TGSI_OPCODE_SLE:
5318       exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5319       break;
5320 
5321    case TGSI_OPCODE_SNE:
5322       exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5323       break;
5324 
5325    case TGSI_OPCODE_TEX:
5326       /* simple texture lookup */
5327       /* src[0] = texcoord */
5328       /* src[1] = sampler unit */
5329       exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
5330       break;
5331 
5332    case TGSI_OPCODE_TXB:
5333       /* Texture lookup with lod bias */
5334       /* src[0] = texcoord (src[0].w = LOD bias) */
5335       /* src[1] = sampler unit */
5336       exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
5337       break;
5338 
5339    case TGSI_OPCODE_TXD:
5340       /* Texture lookup with explict partial derivatives */
5341       /* src[0] = texcoord */
5342       /* src[1] = d[strq]/dx */
5343       /* src[2] = d[strq]/dy */
5344       /* src[3] = sampler unit */
5345       exec_txd(mach, inst);
5346       break;
5347 
5348    case TGSI_OPCODE_TXL:
5349       /* Texture lookup with explit LOD */
5350       /* src[0] = texcoord (src[0].w = LOD) */
5351       /* src[1] = sampler unit */
5352       exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
5353       break;
5354 
5355    case TGSI_OPCODE_TXP:
5356       /* Texture lookup with projection */
5357       /* src[0] = texcoord (src[0].w = projection) */
5358       /* src[1] = sampler unit */
5359       exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
5360       break;
5361 
5362    case TGSI_OPCODE_TG4:
5363       /* src[0] = texcoord */
5364       /* src[1] = component */
5365       /* src[2] = sampler unit */
5366       exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
5367       break;
5368 
5369    case TGSI_OPCODE_LODQ:
5370       /* src[0] = texcoord */
5371       /* src[1] = sampler unit */
5372       exec_lodq(mach, inst);
5373       break;
5374 
5375    case TGSI_OPCODE_UP2H:
5376       exec_up2h(mach, inst);
5377       break;
5378 
5379    case TGSI_OPCODE_UP2US:
5380       assert (0);
5381       break;
5382 
5383    case TGSI_OPCODE_UP4B:
5384       assert (0);
5385       break;
5386 
5387    case TGSI_OPCODE_UP4UB:
5388       assert (0);
5389       break;
5390 
5391    case TGSI_OPCODE_ARR:
5392       exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5393       break;
5394 
5395    case TGSI_OPCODE_CAL:
5396       /* skip the call if no execution channels are enabled */
5397       if (mach->ExecMask) {
5398          /* do the call */
5399 
5400          /* First, record the depths of the execution stacks.
5401           * This is important for deeply nested/looped return statements.
5402           * We have to unwind the stacks by the correct amount.  For a
5403           * real code generator, we could determine the number of entries
5404           * to pop off each stack with simple static analysis and avoid
5405           * implementing this data structure at run time.
5406           */
5407          mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
5408          mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
5409          mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5410          mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5411          mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
5412          /* note that PC was already incremented above */
5413          mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
5414 
5415          mach->CallStackTop++;
5416 
5417          /* Second, push the Cond, Loop, Cont, Func stacks */
5418          assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5419          assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5420          assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5421          assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5422          assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5423          assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5424 
5425          mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5426          mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5427          mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5428          mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5429          mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5430          mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
5431 
5432          /* Finally, jump to the subroutine.  The label is a pointer
5433           * (an instruction number) to the BGNSUB instruction.
5434           */
5435          *pc = inst->Label.Label;
5436          assert(mach->Instructions[*pc].Instruction.Opcode
5437                 == TGSI_OPCODE_BGNSUB);
5438       }
5439       break;
5440 
5441    case TGSI_OPCODE_RET:
5442       mach->FuncMask &= ~mach->ExecMask;
5443       UPDATE_EXEC_MASK(mach);
5444 
5445       if (mach->FuncMask == 0x0) {
5446          /* really return now (otherwise, keep executing */
5447 
5448          if (mach->CallStackTop == 0) {
5449             /* returning from main() */
5450             mach->CondStackTop = 0;
5451             mach->LoopStackTop = 0;
5452             mach->ContStackTop = 0;
5453             mach->LoopLabelStackTop = 0;
5454             mach->SwitchStackTop = 0;
5455             mach->BreakStackTop = 0;
5456             *pc = -1;
5457             return FALSE;
5458          }
5459 
5460          assert(mach->CallStackTop > 0);
5461          mach->CallStackTop--;
5462 
5463          mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5464          mach->CondMask = mach->CondStack[mach->CondStackTop];
5465 
5466          mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5467          mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5468 
5469          mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5470          mach->ContMask = mach->ContStack[mach->ContStackTop];
5471 
5472          mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5473          mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5474 
5475          mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5476          mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5477 
5478          assert(mach->FuncStackTop > 0);
5479          mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5480 
5481          *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5482 
5483          UPDATE_EXEC_MASK(mach);
5484       }
5485       break;
5486 
5487    case TGSI_OPCODE_SSG:
5488       exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5489       break;
5490 
5491    case TGSI_OPCODE_CMP:
5492       exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5493       break;
5494 
5495    case TGSI_OPCODE_SCS:
5496       exec_scs(mach, inst);
5497       break;
5498 
5499    case TGSI_OPCODE_DIV:
5500       exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5501       break;
5502 
5503    case TGSI_OPCODE_DP2:
5504       exec_dp2(mach, inst);
5505       break;
5506 
5507    case TGSI_OPCODE_IF:
5508       /* push CondMask */
5509       assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5510       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5511       FETCH( &r[0], 0, TGSI_CHAN_X );
5512       /* update CondMask */
5513       if( ! r[0].f[0] ) {
5514          mach->CondMask &= ~0x1;
5515       }
5516       if( ! r[0].f[1] ) {
5517          mach->CondMask &= ~0x2;
5518       }
5519       if( ! r[0].f[2] ) {
5520          mach->CondMask &= ~0x4;
5521       }
5522       if( ! r[0].f[3] ) {
5523          mach->CondMask &= ~0x8;
5524       }
5525       UPDATE_EXEC_MASK(mach);
5526       /* Todo: If CondMask==0, jump to ELSE */
5527       break;
5528 
5529    case TGSI_OPCODE_UIF:
5530       /* push CondMask */
5531       assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5532       mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5533       IFETCH( &r[0], 0, TGSI_CHAN_X );
5534       /* update CondMask */
5535       if( ! r[0].u[0] ) {
5536          mach->CondMask &= ~0x1;
5537       }
5538       if( ! r[0].u[1] ) {
5539          mach->CondMask &= ~0x2;
5540       }
5541       if( ! r[0].u[2] ) {
5542          mach->CondMask &= ~0x4;
5543       }
5544       if( ! r[0].u[3] ) {
5545          mach->CondMask &= ~0x8;
5546       }
5547       UPDATE_EXEC_MASK(mach);
5548       /* Todo: If CondMask==0, jump to ELSE */
5549       break;
5550 
5551    case TGSI_OPCODE_ELSE:
5552       /* invert CondMask wrt previous mask */
5553       {
5554          uint prevMask;
5555          assert(mach->CondStackTop > 0);
5556          prevMask = mach->CondStack[mach->CondStackTop - 1];
5557          mach->CondMask = ~mach->CondMask & prevMask;
5558          UPDATE_EXEC_MASK(mach);
5559          /* Todo: If CondMask==0, jump to ENDIF */
5560       }
5561       break;
5562 
5563    case TGSI_OPCODE_ENDIF:
5564       /* pop CondMask */
5565       assert(mach->CondStackTop > 0);
5566       mach->CondMask = mach->CondStack[--mach->CondStackTop];
5567       UPDATE_EXEC_MASK(mach);
5568       break;
5569 
5570    case TGSI_OPCODE_END:
5571       /* make sure we end primitives which haven't
5572        * been explicitly emitted */
5573       conditional_emit_primitive(mach);
5574       /* halt execution */
5575       *pc = -1;
5576       break;
5577 
5578    case TGSI_OPCODE_PUSHA:
5579       assert (0);
5580       break;
5581 
5582    case TGSI_OPCODE_POPA:
5583       assert (0);
5584       break;
5585 
5586    case TGSI_OPCODE_CEIL:
5587       exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5588       break;
5589 
5590    case TGSI_OPCODE_I2F:
5591       exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
5592       break;
5593 
5594    case TGSI_OPCODE_NOT:
5595       exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5596       break;
5597 
5598    case TGSI_OPCODE_TRUNC:
5599       exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5600       break;
5601 
5602    case TGSI_OPCODE_SHL:
5603       exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5604       break;
5605 
5606    case TGSI_OPCODE_AND:
5607       exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5608       break;
5609 
5610    case TGSI_OPCODE_OR:
5611       exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5612       break;
5613 
5614    case TGSI_OPCODE_MOD:
5615       exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5616       break;
5617 
5618    case TGSI_OPCODE_XOR:
5619       exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5620       break;
5621 
5622    case TGSI_OPCODE_SAD:
5623       assert (0);
5624       break;
5625 
5626    case TGSI_OPCODE_TXF:
5627       exec_txf(mach, inst);
5628       break;
5629 
5630    case TGSI_OPCODE_TXQ:
5631       exec_txq(mach, inst);
5632       break;
5633 
5634    case TGSI_OPCODE_EMIT:
5635       emit_vertex(mach);
5636       break;
5637 
5638    case TGSI_OPCODE_ENDPRIM:
5639       emit_primitive(mach);
5640       break;
5641 
5642    case TGSI_OPCODE_BGNLOOP:
5643       /* push LoopMask and ContMasks */
5644       assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5645       assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5646       assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5647       assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5648 
5649       mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5650       mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5651       mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5652       mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5653       mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
5654       break;
5655 
5656    case TGSI_OPCODE_ENDLOOP:
5657       /* Restore ContMask, but don't pop */
5658       assert(mach->ContStackTop > 0);
5659       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
5660       UPDATE_EXEC_MASK(mach);
5661       if (mach->ExecMask) {
5662          /* repeat loop: jump to instruction just past BGNLOOP */
5663          assert(mach->LoopLabelStackTop > 0);
5664          *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
5665       }
5666       else {
5667          /* exit loop: pop LoopMask */
5668          assert(mach->LoopStackTop > 0);
5669          mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
5670          /* pop ContMask */
5671          assert(mach->ContStackTop > 0);
5672          mach->ContMask = mach->ContStack[--mach->ContStackTop];
5673          assert(mach->LoopLabelStackTop > 0);
5674          --mach->LoopLabelStackTop;
5675 
5676          mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
5677       }
5678       UPDATE_EXEC_MASK(mach);
5679       break;
5680 
5681    case TGSI_OPCODE_BRK:
5682       exec_break(mach);
5683       break;
5684 
5685    case TGSI_OPCODE_CONT:
5686       /* turn off cont channels for each enabled exec channel */
5687       mach->ContMask &= ~mach->ExecMask;
5688       /* Todo: if mach->LoopMask == 0, jump to end of loop */
5689       UPDATE_EXEC_MASK(mach);
5690       break;
5691 
5692    case TGSI_OPCODE_BGNSUB:
5693       /* no-op */
5694       break;
5695 
5696    case TGSI_OPCODE_ENDSUB:
5697       /*
5698        * XXX: This really should be a no-op. We should never reach this opcode.
5699        */
5700 
5701       assert(mach->CallStackTop > 0);
5702       mach->CallStackTop--;
5703 
5704       mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5705       mach->CondMask = mach->CondStack[mach->CondStackTop];
5706 
5707       mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5708       mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5709 
5710       mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5711       mach->ContMask = mach->ContStack[mach->ContStackTop];
5712 
5713       mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5714       mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5715 
5716       mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5717       mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5718 
5719       assert(mach->FuncStackTop > 0);
5720       mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5721 
5722       *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5723 
5724       UPDATE_EXEC_MASK(mach);
5725       break;
5726 
5727    case TGSI_OPCODE_NOP:
5728       break;
5729 
5730    case TGSI_OPCODE_BREAKC:
5731       IFETCH(&r[0], 0, TGSI_CHAN_X);
5732       /* update CondMask */
5733       if (r[0].u[0] && (mach->ExecMask & 0x1)) {
5734          mach->LoopMask &= ~0x1;
5735       }
5736       if (r[0].u[1] && (mach->ExecMask & 0x2)) {
5737          mach->LoopMask &= ~0x2;
5738       }
5739       if (r[0].u[2] && (mach->ExecMask & 0x4)) {
5740          mach->LoopMask &= ~0x4;
5741       }
5742       if (r[0].u[3] && (mach->ExecMask & 0x8)) {
5743          mach->LoopMask &= ~0x8;
5744       }
5745       /* Todo: if mach->LoopMask == 0, jump to end of loop */
5746       UPDATE_EXEC_MASK(mach);
5747       break;
5748 
5749    case TGSI_OPCODE_F2I:
5750       exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5751       break;
5752 
5753    case TGSI_OPCODE_FSEQ:
5754       exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5755       break;
5756 
5757    case TGSI_OPCODE_FSGE:
5758       exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5759       break;
5760 
5761    case TGSI_OPCODE_FSLT:
5762       exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5763       break;
5764 
5765    case TGSI_OPCODE_FSNE:
5766       exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5767       break;
5768 
5769    case TGSI_OPCODE_IDIV:
5770       exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5771       break;
5772 
5773    case TGSI_OPCODE_IMAX:
5774       exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5775       break;
5776 
5777    case TGSI_OPCODE_IMIN:
5778       exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5779       break;
5780 
5781    case TGSI_OPCODE_INEG:
5782       exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5783       break;
5784 
5785    case TGSI_OPCODE_ISGE:
5786       exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5787       break;
5788 
5789    case TGSI_OPCODE_ISHR:
5790       exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5791       break;
5792 
5793    case TGSI_OPCODE_ISLT:
5794       exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5795       break;
5796 
5797    case TGSI_OPCODE_F2U:
5798       exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5799       break;
5800 
5801    case TGSI_OPCODE_U2F:
5802       exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
5803       break;
5804 
5805    case TGSI_OPCODE_UADD:
5806       exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5807       break;
5808 
5809    case TGSI_OPCODE_UDIV:
5810       exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5811       break;
5812 
5813    case TGSI_OPCODE_UMAD:
5814       exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5815       break;
5816 
5817    case TGSI_OPCODE_UMAX:
5818       exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5819       break;
5820 
5821    case TGSI_OPCODE_UMIN:
5822       exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5823       break;
5824 
5825    case TGSI_OPCODE_UMOD:
5826       exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5827       break;
5828 
5829    case TGSI_OPCODE_UMUL:
5830       exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5831       break;
5832 
5833    case TGSI_OPCODE_IMUL_HI:
5834       exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5835       break;
5836 
5837    case TGSI_OPCODE_UMUL_HI:
5838       exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5839       break;
5840 
5841    case TGSI_OPCODE_USEQ:
5842       exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5843       break;
5844 
5845    case TGSI_OPCODE_USGE:
5846       exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5847       break;
5848 
5849    case TGSI_OPCODE_USHR:
5850       exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5851       break;
5852 
5853    case TGSI_OPCODE_USLT:
5854       exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5855       break;
5856 
5857    case TGSI_OPCODE_USNE:
5858       exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5859       break;
5860 
5861    case TGSI_OPCODE_SWITCH:
5862       exec_switch(mach, inst);
5863       break;
5864 
5865    case TGSI_OPCODE_CASE:
5866       exec_case(mach, inst);
5867       break;
5868 
5869    case TGSI_OPCODE_DEFAULT:
5870       exec_default(mach);
5871       break;
5872 
5873    case TGSI_OPCODE_ENDSWITCH:
5874       exec_endswitch(mach);
5875       break;
5876 
5877    case TGSI_OPCODE_SAMPLE_I:
5878       exec_txf(mach, inst);
5879       break;
5880 
5881    case TGSI_OPCODE_SAMPLE_I_MS:
5882       exec_txf(mach, inst);
5883       break;
5884 
5885    case TGSI_OPCODE_SAMPLE:
5886       exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
5887       break;
5888 
5889    case TGSI_OPCODE_SAMPLE_B:
5890       exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
5891       break;
5892 
5893    case TGSI_OPCODE_SAMPLE_C:
5894       exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
5895       break;
5896 
5897    case TGSI_OPCODE_SAMPLE_C_LZ:
5898       exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
5899       break;
5900 
5901    case TGSI_OPCODE_SAMPLE_D:
5902       exec_sample_d(mach, inst);
5903       break;
5904 
5905    case TGSI_OPCODE_SAMPLE_L:
5906       exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
5907       break;
5908 
5909    case TGSI_OPCODE_GATHER4:
5910       assert(0);
5911       break;
5912 
5913    case TGSI_OPCODE_SVIEWINFO:
5914       exec_txq(mach, inst);
5915       break;
5916 
5917    case TGSI_OPCODE_SAMPLE_POS:
5918       assert(0);
5919       break;
5920 
5921    case TGSI_OPCODE_SAMPLE_INFO:
5922       assert(0);
5923       break;
5924 
5925    case TGSI_OPCODE_UARL:
5926       exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5927       break;
5928 
5929    case TGSI_OPCODE_UCMP:
5930       exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5931       break;
5932 
5933    case TGSI_OPCODE_IABS:
5934       exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5935       break;
5936 
5937    case TGSI_OPCODE_ISSG:
5938       exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5939       break;
5940 
5941    case TGSI_OPCODE_TEX2:
5942       /* simple texture lookup */
5943       /* src[0] = texcoord */
5944       /* src[1] = compare */
5945       /* src[2] = sampler unit */
5946       exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
5947       break;
5948    case TGSI_OPCODE_TXB2:
5949       /* simple texture lookup */
5950       /* src[0] = texcoord */
5951       /* src[1] = bias */
5952       /* src[2] = sampler unit */
5953       exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
5954       break;
5955    case TGSI_OPCODE_TXL2:
5956       /* simple texture lookup */
5957       /* src[0] = texcoord */
5958       /* src[1] = lod */
5959       /* src[2] = sampler unit */
5960       exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
5961       break;
5962 
5963    case TGSI_OPCODE_IBFE:
5964       exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5965       break;
5966    case TGSI_OPCODE_UBFE:
5967       exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5968       break;
5969    case TGSI_OPCODE_BFI:
5970       exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5971       break;
5972    case TGSI_OPCODE_BREV:
5973       exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5974       break;
5975    case TGSI_OPCODE_POPC:
5976       exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5977       break;
5978    case TGSI_OPCODE_LSB:
5979       exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5980       break;
5981    case TGSI_OPCODE_IMSB:
5982       exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5983       break;
5984    case TGSI_OPCODE_UMSB:
5985       exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5986       break;
5987 
5988    case TGSI_OPCODE_F2D:
5989       exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
5990       break;
5991 
5992    case TGSI_OPCODE_D2F:
5993       exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT);
5994       break;
5995 
5996    case TGSI_OPCODE_DABS:
5997       exec_double_unary(mach, inst, micro_dabs);
5998       break;
5999 
6000    case TGSI_OPCODE_DNEG:
6001       exec_double_unary(mach, inst, micro_dneg);
6002       break;
6003 
6004    case TGSI_OPCODE_DADD:
6005       exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
6006       break;
6007 
6008    case TGSI_OPCODE_DDIV:
6009       exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
6010       break;
6011 
6012    case TGSI_OPCODE_DMUL:
6013       exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
6014       break;
6015 
6016    case TGSI_OPCODE_DMAX:
6017       exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
6018       break;
6019 
6020    case TGSI_OPCODE_DMIN:
6021       exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
6022       break;
6023 
6024    case TGSI_OPCODE_DSLT:
6025       exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
6026       break;
6027 
6028    case TGSI_OPCODE_DSGE:
6029       exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
6030       break;
6031 
6032    case TGSI_OPCODE_DSEQ:
6033       exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
6034       break;
6035 
6036    case TGSI_OPCODE_DSNE:
6037       exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
6038       break;
6039 
6040    case TGSI_OPCODE_DRCP:
6041       exec_double_unary(mach, inst, micro_drcp);
6042       break;
6043 
6044    case TGSI_OPCODE_DSQRT:
6045       exec_double_unary(mach, inst, micro_dsqrt);
6046       break;
6047 
6048    case TGSI_OPCODE_DRSQ:
6049       exec_double_unary(mach, inst, micro_drsq);
6050       break;
6051 
6052    case TGSI_OPCODE_DMAD:
6053       exec_double_trinary(mach, inst, micro_dmad);
6054       break;
6055 
6056    case TGSI_OPCODE_DFRAC:
6057       exec_double_unary(mach, inst, micro_dfrac);
6058       break;
6059 
6060    case TGSI_OPCODE_DLDEXP:
6061       exec_dldexp(mach, inst);
6062       break;
6063 
6064    case TGSI_OPCODE_DFRACEXP:
6065       exec_dfracexp(mach, inst);
6066       break;
6067 
6068    case TGSI_OPCODE_I2D:
6069       exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT);
6070       break;
6071 
6072    case TGSI_OPCODE_D2I:
6073       exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT);
6074       break;
6075 
6076    case TGSI_OPCODE_U2D:
6077       exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT);
6078       break;
6079 
6080    case TGSI_OPCODE_D2U:
6081       exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT);
6082       break;
6083 
6084    case TGSI_OPCODE_LOAD:
6085       exec_load(mach, inst);
6086       break;
6087 
6088    case TGSI_OPCODE_STORE:
6089       exec_store(mach, inst);
6090       break;
6091 
6092    case TGSI_OPCODE_ATOMUADD:
6093    case TGSI_OPCODE_ATOMXCHG:
6094    case TGSI_OPCODE_ATOMCAS:
6095    case TGSI_OPCODE_ATOMAND:
6096    case TGSI_OPCODE_ATOMOR:
6097    case TGSI_OPCODE_ATOMXOR:
6098    case TGSI_OPCODE_ATOMUMIN:
6099    case TGSI_OPCODE_ATOMUMAX:
6100    case TGSI_OPCODE_ATOMIMIN:
6101    case TGSI_OPCODE_ATOMIMAX:
6102       exec_atomop(mach, inst);
6103       break;
6104 
6105    case TGSI_OPCODE_RESQ:
6106       exec_resq(mach, inst);
6107       break;
6108    case TGSI_OPCODE_BARRIER:
6109    case TGSI_OPCODE_MEMBAR:
6110       return TRUE;
6111       break;
6112 
6113    case TGSI_OPCODE_I64ABS:
6114       exec_double_unary(mach, inst, micro_i64abs);
6115       break;
6116 
6117    case TGSI_OPCODE_I64SSG:
6118       exec_double_unary(mach, inst, micro_i64sgn);
6119       break;
6120 
6121    case TGSI_OPCODE_I64NEG:
6122       exec_double_unary(mach, inst, micro_i64neg);
6123       break;
6124 
6125    case TGSI_OPCODE_U64SEQ:
6126       exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
6127       break;
6128 
6129    case TGSI_OPCODE_U64SNE:
6130       exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
6131       break;
6132 
6133    case TGSI_OPCODE_I64SLT:
6134       exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
6135       break;
6136    case TGSI_OPCODE_U64SLT:
6137       exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
6138       break;
6139 
6140    case TGSI_OPCODE_I64SGE:
6141       exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
6142       break;
6143    case TGSI_OPCODE_U64SGE:
6144       exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
6145       break;
6146 
6147    case TGSI_OPCODE_I64MIN:
6148       exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
6149       break;
6150    case TGSI_OPCODE_U64MIN:
6151       exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
6152       break;
6153    case TGSI_OPCODE_I64MAX:
6154       exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
6155       break;
6156    case TGSI_OPCODE_U64MAX:
6157       exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
6158       break;
6159    case TGSI_OPCODE_U64ADD:
6160       exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
6161       break;
6162    case TGSI_OPCODE_U64MUL:
6163       exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
6164       break;
6165    case TGSI_OPCODE_U64SHL:
6166       exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
6167       break;
6168    case TGSI_OPCODE_I64SHR:
6169       exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
6170       break;
6171    case TGSI_OPCODE_U64SHR:
6172       exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
6173       break;
6174    case TGSI_OPCODE_U64DIV:
6175       exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
6176       break;
6177    case TGSI_OPCODE_I64DIV:
6178       exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
6179       break;
6180    case TGSI_OPCODE_U64MOD:
6181       exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
6182       break;
6183    case TGSI_OPCODE_I64MOD:
6184       exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
6185       break;
6186 
6187    case TGSI_OPCODE_F2U64:
6188       exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
6189       break;
6190 
6191    case TGSI_OPCODE_F2I64:
6192       exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
6193       break;
6194 
6195    case TGSI_OPCODE_U2I64:
6196       exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
6197       break;
6198    case TGSI_OPCODE_I2I64:
6199       exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
6200       break;
6201 
6202    case TGSI_OPCODE_D2U64:
6203       exec_double_unary(mach, inst, micro_d2u64);
6204       break;
6205 
6206    case TGSI_OPCODE_D2I64:
6207       exec_double_unary(mach, inst, micro_d2i64);
6208       break;
6209 
6210    case TGSI_OPCODE_U642F:
6211       exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT);
6212       break;
6213    case TGSI_OPCODE_I642F:
6214       exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT);
6215       break;
6216 
6217    case TGSI_OPCODE_U642D:
6218       exec_double_unary(mach, inst, micro_u642d);
6219       break;
6220    case TGSI_OPCODE_I642D:
6221       exec_double_unary(mach, inst, micro_i642d);
6222       break;
6223 
6224    default:
6225       assert( 0 );
6226    }
6227    return FALSE;
6228 }
6229 
6230 static void
tgsi_exec_machine_setup_masks(struct tgsi_exec_machine * mach)6231 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
6232 {
6233    uint default_mask = 0xf;
6234 
6235    mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
6236    mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
6237 
6238    if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
6239       mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
6240       mach->Primitives[0] = 0;
6241       /* GS runs on a single primitive for now */
6242       default_mask = 0x1;
6243    }
6244 
6245    if (mach->NonHelperMask == 0)
6246       mach->NonHelperMask = default_mask;
6247    mach->CondMask = default_mask;
6248    mach->LoopMask = default_mask;
6249    mach->ContMask = default_mask;
6250    mach->FuncMask = default_mask;
6251    mach->ExecMask = default_mask;
6252 
6253    mach->Switch.mask = default_mask;
6254 
6255    assert(mach->CondStackTop == 0);
6256    assert(mach->LoopStackTop == 0);
6257    assert(mach->ContStackTop == 0);
6258    assert(mach->SwitchStackTop == 0);
6259    assert(mach->BreakStackTop == 0);
6260    assert(mach->CallStackTop == 0);
6261 }
6262 
6263 /**
6264  * Run TGSI interpreter.
6265  * \return bitmask of "alive" quad components
6266  */
6267 uint
tgsi_exec_machine_run(struct tgsi_exec_machine * mach,int start_pc)6268 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
6269 {
6270    uint i;
6271 
6272    mach->pc = start_pc;
6273 
6274    if (!start_pc) {
6275       tgsi_exec_machine_setup_masks(mach);
6276 
6277       /* execute declarations (interpolants) */
6278       for (i = 0; i < mach->NumDeclarations; i++) {
6279          exec_declaration( mach, mach->Declarations+i );
6280       }
6281    }
6282 
6283    {
6284 #if DEBUG_EXECUTION
6285       struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
6286       struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6287       uint inst = 1;
6288 
6289       if (!start_pc) {
6290          memset(mach->Temps, 0, sizeof(temps));
6291          if (mach->Outputs)
6292             memset(mach->Outputs, 0, sizeof(outputs));
6293          memset(temps, 0, sizeof(temps));
6294          memset(outputs, 0, sizeof(outputs));
6295       }
6296 #endif
6297 
6298       /* execute instructions, until pc is set to -1 */
6299       while (mach->pc != -1) {
6300          boolean barrier_hit;
6301 #if DEBUG_EXECUTION
6302          uint i;
6303 
6304          tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6305 #endif
6306 
6307          assert(mach->pc < (int) mach->NumInstructions);
6308          barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
6309 
6310          /* for compute shaders if we hit a barrier return now for later rescheduling */
6311          if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
6312             return 0;
6313 
6314 #if DEBUG_EXECUTION
6315          for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
6316             if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6317                uint j;
6318 
6319                memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6320                debug_printf("TEMP[%2u] = ", i);
6321                for (j = 0; j < 4; j++) {
6322                   if (j > 0) {
6323                      debug_printf("           ");
6324                   }
6325                   debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6326                                temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6327                                temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6328                                temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6329                                temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6330                }
6331             }
6332          }
6333          if (mach->Outputs) {
6334             for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
6335                if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
6336                   uint j;
6337 
6338                   memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
6339                   debug_printf("OUT[%2u] =  ", i);
6340                   for (j = 0; j < 4; j++) {
6341                      if (j > 0) {
6342                         debug_printf("           ");
6343                      }
6344                      debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6345                                   outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
6346                                   outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
6347                                   outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
6348                                   outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6349                   }
6350                }
6351             }
6352          }
6353 #endif
6354       }
6355    }
6356 
6357 #if 0
6358    /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6359    if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
6360       /*
6361        * Scale back depth component.
6362        */
6363       for (i = 0; i < 4; i++)
6364          mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
6365    }
6366 #endif
6367 
6368    /* Strictly speaking, these assertions aren't really needed but they
6369     * can potentially catch some bugs in the control flow code.
6370     */
6371    assert(mach->CondStackTop == 0);
6372    assert(mach->LoopStackTop == 0);
6373    assert(mach->ContStackTop == 0);
6374    assert(mach->SwitchStackTop == 0);
6375    assert(mach->BreakStackTop == 0);
6376    assert(mach->CallStackTop == 0);
6377 
6378    return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
6379 }
6380