1 /**************************************************************************
2 *
3 * Copyright 2007-2008 VMware, Inc.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/u_half.h"
62 #include "util/u_memory.h"
63 #include "util/u_math.h"
64 #include "util/rounding.h"
65
66
67 #define DEBUG_EXECUTION 0
68
69
70 #define FAST_MATH 0
71
72 #define TILE_TOP_LEFT 0
73 #define TILE_TOP_RIGHT 1
74 #define TILE_BOTTOM_LEFT 2
75 #define TILE_BOTTOM_RIGHT 3
76
77 union tgsi_double_channel {
78 double d[TGSI_QUAD_SIZE];
79 unsigned u[TGSI_QUAD_SIZE][2];
80 uint64_t u64[TGSI_QUAD_SIZE];
81 int64_t i64[TGSI_QUAD_SIZE];
82 };
83
84 struct tgsi_double_vector {
85 union tgsi_double_channel xy;
86 union tgsi_double_channel zw;
87 };
88
89 static void
micro_abs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)90 micro_abs(union tgsi_exec_channel *dst,
91 const union tgsi_exec_channel *src)
92 {
93 dst->f[0] = fabsf(src->f[0]);
94 dst->f[1] = fabsf(src->f[1]);
95 dst->f[2] = fabsf(src->f[2]);
96 dst->f[3] = fabsf(src->f[3]);
97 }
98
99 static void
micro_arl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)100 micro_arl(union tgsi_exec_channel *dst,
101 const union tgsi_exec_channel *src)
102 {
103 dst->i[0] = (int)floorf(src->f[0]);
104 dst->i[1] = (int)floorf(src->f[1]);
105 dst->i[2] = (int)floorf(src->f[2]);
106 dst->i[3] = (int)floorf(src->f[3]);
107 }
108
109 static void
micro_arr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)110 micro_arr(union tgsi_exec_channel *dst,
111 const union tgsi_exec_channel *src)
112 {
113 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
114 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
115 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
116 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
117 }
118
119 static void
micro_ceil(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)120 micro_ceil(union tgsi_exec_channel *dst,
121 const union tgsi_exec_channel *src)
122 {
123 dst->f[0] = ceilf(src->f[0]);
124 dst->f[1] = ceilf(src->f[1]);
125 dst->f[2] = ceilf(src->f[2]);
126 dst->f[3] = ceilf(src->f[3]);
127 }
128
129 static void
micro_clamp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)130 micro_clamp(union tgsi_exec_channel *dst,
131 const union tgsi_exec_channel *src0,
132 const union tgsi_exec_channel *src1,
133 const union tgsi_exec_channel *src2)
134 {
135 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0];
136 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1];
137 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2];
138 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3];
139 }
140
141 static void
micro_cmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)142 micro_cmp(union tgsi_exec_channel *dst,
143 const union tgsi_exec_channel *src0,
144 const union tgsi_exec_channel *src1,
145 const union tgsi_exec_channel *src2)
146 {
147 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
148 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
149 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
150 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
151 }
152
153 static void
micro_cos(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)154 micro_cos(union tgsi_exec_channel *dst,
155 const union tgsi_exec_channel *src)
156 {
157 dst->f[0] = cosf(src->f[0]);
158 dst->f[1] = cosf(src->f[1]);
159 dst->f[2] = cosf(src->f[2]);
160 dst->f[3] = cosf(src->f[3]);
161 }
162
163 static void
micro_d2f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)164 micro_d2f(union tgsi_exec_channel *dst,
165 const union tgsi_double_channel *src)
166 {
167 dst->f[0] = (float)src->d[0];
168 dst->f[1] = (float)src->d[1];
169 dst->f[2] = (float)src->d[2];
170 dst->f[3] = (float)src->d[3];
171 }
172
173 static void
micro_d2i(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)174 micro_d2i(union tgsi_exec_channel *dst,
175 const union tgsi_double_channel *src)
176 {
177 dst->i[0] = (int)src->d[0];
178 dst->i[1] = (int)src->d[1];
179 dst->i[2] = (int)src->d[2];
180 dst->i[3] = (int)src->d[3];
181 }
182
183 static void
micro_d2u(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)184 micro_d2u(union tgsi_exec_channel *dst,
185 const union tgsi_double_channel *src)
186 {
187 dst->u[0] = (unsigned)src->d[0];
188 dst->u[1] = (unsigned)src->d[1];
189 dst->u[2] = (unsigned)src->d[2];
190 dst->u[3] = (unsigned)src->d[3];
191 }
192 static void
micro_dabs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)193 micro_dabs(union tgsi_double_channel *dst,
194 const union tgsi_double_channel *src)
195 {
196 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
197 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
198 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
199 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
200 }
201
202 static void
micro_dadd(union tgsi_double_channel * dst,const union tgsi_double_channel * src)203 micro_dadd(union tgsi_double_channel *dst,
204 const union tgsi_double_channel *src)
205 {
206 dst->d[0] = src[0].d[0] + src[1].d[0];
207 dst->d[1] = src[0].d[1] + src[1].d[1];
208 dst->d[2] = src[0].d[2] + src[1].d[2];
209 dst->d[3] = src[0].d[3] + src[1].d[3];
210 }
211
212 static void
micro_ddiv(union tgsi_double_channel * dst,const union tgsi_double_channel * src)213 micro_ddiv(union tgsi_double_channel *dst,
214 const union tgsi_double_channel *src)
215 {
216 dst->d[0] = src[0].d[0] / src[1].d[0];
217 dst->d[1] = src[0].d[1] / src[1].d[1];
218 dst->d[2] = src[0].d[2] / src[1].d[2];
219 dst->d[3] = src[0].d[3] / src[1].d[3];
220 }
221
222 static void
micro_ddx(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)223 micro_ddx(union tgsi_exec_channel *dst,
224 const union tgsi_exec_channel *src)
225 {
226 dst->f[0] =
227 dst->f[1] =
228 dst->f[2] =
229 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
230 }
231
232 static void
micro_ddy(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)233 micro_ddy(union tgsi_exec_channel *dst,
234 const union tgsi_exec_channel *src)
235 {
236 dst->f[0] =
237 dst->f[1] =
238 dst->f[2] =
239 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
240 }
241
242 static void
micro_dmul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)243 micro_dmul(union tgsi_double_channel *dst,
244 const union tgsi_double_channel *src)
245 {
246 dst->d[0] = src[0].d[0] * src[1].d[0];
247 dst->d[1] = src[0].d[1] * src[1].d[1];
248 dst->d[2] = src[0].d[2] * src[1].d[2];
249 dst->d[3] = src[0].d[3] * src[1].d[3];
250 }
251
252 static void
micro_dmax(union tgsi_double_channel * dst,const union tgsi_double_channel * src)253 micro_dmax(union tgsi_double_channel *dst,
254 const union tgsi_double_channel *src)
255 {
256 dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0];
257 dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1];
258 dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2];
259 dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3];
260 }
261
262 static void
micro_dmin(union tgsi_double_channel * dst,const union tgsi_double_channel * src)263 micro_dmin(union tgsi_double_channel *dst,
264 const union tgsi_double_channel *src)
265 {
266 dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0];
267 dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1];
268 dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2];
269 dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3];
270 }
271
272 static void
micro_dneg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)273 micro_dneg(union tgsi_double_channel *dst,
274 const union tgsi_double_channel *src)
275 {
276 dst->d[0] = -src->d[0];
277 dst->d[1] = -src->d[1];
278 dst->d[2] = -src->d[2];
279 dst->d[3] = -src->d[3];
280 }
281
282 static void
micro_dslt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)283 micro_dslt(union tgsi_double_channel *dst,
284 const union tgsi_double_channel *src)
285 {
286 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
287 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
288 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
289 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
290 }
291
292 static void
micro_dsne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)293 micro_dsne(union tgsi_double_channel *dst,
294 const union tgsi_double_channel *src)
295 {
296 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
297 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
298 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
299 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
300 }
301
302 static void
micro_dsge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)303 micro_dsge(union tgsi_double_channel *dst,
304 const union tgsi_double_channel *src)
305 {
306 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
307 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
308 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
309 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
310 }
311
312 static void
micro_dseq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)313 micro_dseq(union tgsi_double_channel *dst,
314 const union tgsi_double_channel *src)
315 {
316 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
317 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
318 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
319 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
320 }
321
322 static void
micro_drcp(union tgsi_double_channel * dst,const union tgsi_double_channel * src)323 micro_drcp(union tgsi_double_channel *dst,
324 const union tgsi_double_channel *src)
325 {
326 dst->d[0] = 1.0 / src->d[0];
327 dst->d[1] = 1.0 / src->d[1];
328 dst->d[2] = 1.0 / src->d[2];
329 dst->d[3] = 1.0 / src->d[3];
330 }
331
332 static void
micro_dsqrt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)333 micro_dsqrt(union tgsi_double_channel *dst,
334 const union tgsi_double_channel *src)
335 {
336 dst->d[0] = sqrt(src->d[0]);
337 dst->d[1] = sqrt(src->d[1]);
338 dst->d[2] = sqrt(src->d[2]);
339 dst->d[3] = sqrt(src->d[3]);
340 }
341
342 static void
micro_drsq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)343 micro_drsq(union tgsi_double_channel *dst,
344 const union tgsi_double_channel *src)
345 {
346 dst->d[0] = 1.0 / sqrt(src->d[0]);
347 dst->d[1] = 1.0 / sqrt(src->d[1]);
348 dst->d[2] = 1.0 / sqrt(src->d[2]);
349 dst->d[3] = 1.0 / sqrt(src->d[3]);
350 }
351
352 static void
micro_dmad(union tgsi_double_channel * dst,const union tgsi_double_channel * src)353 micro_dmad(union tgsi_double_channel *dst,
354 const union tgsi_double_channel *src)
355 {
356 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
357 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
358 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
359 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
360 }
361
362 static void
micro_dfrac(union tgsi_double_channel * dst,const union tgsi_double_channel * src)363 micro_dfrac(union tgsi_double_channel *dst,
364 const union tgsi_double_channel *src)
365 {
366 dst->d[0] = src->d[0] - floor(src->d[0]);
367 dst->d[1] = src->d[1] - floor(src->d[1]);
368 dst->d[2] = src->d[2] - floor(src->d[2]);
369 dst->d[3] = src->d[3] - floor(src->d[3]);
370 }
371
372 static void
micro_dldexp(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)373 micro_dldexp(union tgsi_double_channel *dst,
374 const union tgsi_double_channel *src0,
375 union tgsi_exec_channel *src1)
376 {
377 dst->d[0] = ldexp(src0->d[0], src1->i[0]);
378 dst->d[1] = ldexp(src0->d[1], src1->i[1]);
379 dst->d[2] = ldexp(src0->d[2], src1->i[2]);
380 dst->d[3] = ldexp(src0->d[3], src1->i[3]);
381 }
382
383 static void
micro_dfracexp(union tgsi_double_channel * dst,union tgsi_exec_channel * dst_exp,const union tgsi_double_channel * src)384 micro_dfracexp(union tgsi_double_channel *dst,
385 union tgsi_exec_channel *dst_exp,
386 const union tgsi_double_channel *src)
387 {
388 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
389 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
390 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
391 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
392 }
393
394 static void
micro_exp2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)395 micro_exp2(union tgsi_exec_channel *dst,
396 const union tgsi_exec_channel *src)
397 {
398 #if FAST_MATH
399 dst->f[0] = util_fast_exp2(src->f[0]);
400 dst->f[1] = util_fast_exp2(src->f[1]);
401 dst->f[2] = util_fast_exp2(src->f[2]);
402 dst->f[3] = util_fast_exp2(src->f[3]);
403 #else
404 #if DEBUG
405 /* Inf is okay for this instruction, so clamp it to silence assertions. */
406 uint i;
407 union tgsi_exec_channel clamped;
408
409 for (i = 0; i < 4; i++) {
410 if (src->f[i] > 127.99999f) {
411 clamped.f[i] = 127.99999f;
412 } else if (src->f[i] < -126.99999f) {
413 clamped.f[i] = -126.99999f;
414 } else {
415 clamped.f[i] = src->f[i];
416 }
417 }
418 src = &clamped;
419 #endif /* DEBUG */
420
421 dst->f[0] = powf(2.0f, src->f[0]);
422 dst->f[1] = powf(2.0f, src->f[1]);
423 dst->f[2] = powf(2.0f, src->f[2]);
424 dst->f[3] = powf(2.0f, src->f[3]);
425 #endif /* FAST_MATH */
426 }
427
428 static void
micro_f2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)429 micro_f2d(union tgsi_double_channel *dst,
430 const union tgsi_exec_channel *src)
431 {
432 dst->d[0] = (double)src->f[0];
433 dst->d[1] = (double)src->f[1];
434 dst->d[2] = (double)src->f[2];
435 dst->d[3] = (double)src->f[3];
436 }
437
438 static void
micro_flr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)439 micro_flr(union tgsi_exec_channel *dst,
440 const union tgsi_exec_channel *src)
441 {
442 dst->f[0] = floorf(src->f[0]);
443 dst->f[1] = floorf(src->f[1]);
444 dst->f[2] = floorf(src->f[2]);
445 dst->f[3] = floorf(src->f[3]);
446 }
447
448 static void
micro_frc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)449 micro_frc(union tgsi_exec_channel *dst,
450 const union tgsi_exec_channel *src)
451 {
452 dst->f[0] = src->f[0] - floorf(src->f[0]);
453 dst->f[1] = src->f[1] - floorf(src->f[1]);
454 dst->f[2] = src->f[2] - floorf(src->f[2]);
455 dst->f[3] = src->f[3] - floorf(src->f[3]);
456 }
457
458 static void
micro_i2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)459 micro_i2d(union tgsi_double_channel *dst,
460 const union tgsi_exec_channel *src)
461 {
462 dst->d[0] = (double)src->i[0];
463 dst->d[1] = (double)src->i[1];
464 dst->d[2] = (double)src->i[2];
465 dst->d[3] = (double)src->i[3];
466 }
467
468 static void
micro_iabs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)469 micro_iabs(union tgsi_exec_channel *dst,
470 const union tgsi_exec_channel *src)
471 {
472 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
473 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
474 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
475 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
476 }
477
478 static void
micro_ineg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)479 micro_ineg(union tgsi_exec_channel *dst,
480 const union tgsi_exec_channel *src)
481 {
482 dst->i[0] = -src->i[0];
483 dst->i[1] = -src->i[1];
484 dst->i[2] = -src->i[2];
485 dst->i[3] = -src->i[3];
486 }
487
488 static void
micro_lg2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)489 micro_lg2(union tgsi_exec_channel *dst,
490 const union tgsi_exec_channel *src)
491 {
492 #if FAST_MATH
493 dst->f[0] = util_fast_log2(src->f[0]);
494 dst->f[1] = util_fast_log2(src->f[1]);
495 dst->f[2] = util_fast_log2(src->f[2]);
496 dst->f[3] = util_fast_log2(src->f[3]);
497 #else
498 dst->f[0] = logf(src->f[0]) * 1.442695f;
499 dst->f[1] = logf(src->f[1]) * 1.442695f;
500 dst->f[2] = logf(src->f[2]) * 1.442695f;
501 dst->f[3] = logf(src->f[3]) * 1.442695f;
502 #endif
503 }
504
505 static void
micro_lrp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)506 micro_lrp(union tgsi_exec_channel *dst,
507 const union tgsi_exec_channel *src0,
508 const union tgsi_exec_channel *src1,
509 const union tgsi_exec_channel *src2)
510 {
511 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
512 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
513 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
514 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
515 }
516
517 static void
micro_mad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)518 micro_mad(union tgsi_exec_channel *dst,
519 const union tgsi_exec_channel *src0,
520 const union tgsi_exec_channel *src1,
521 const union tgsi_exec_channel *src2)
522 {
523 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
524 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
525 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
526 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
527 }
528
529 static void
micro_mov(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)530 micro_mov(union tgsi_exec_channel *dst,
531 const union tgsi_exec_channel *src)
532 {
533 dst->u[0] = src->u[0];
534 dst->u[1] = src->u[1];
535 dst->u[2] = src->u[2];
536 dst->u[3] = src->u[3];
537 }
538
539 static void
micro_rcp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)540 micro_rcp(union tgsi_exec_channel *dst,
541 const union tgsi_exec_channel *src)
542 {
543 #if 0 /* for debugging */
544 assert(src->f[0] != 0.0f);
545 assert(src->f[1] != 0.0f);
546 assert(src->f[2] != 0.0f);
547 assert(src->f[3] != 0.0f);
548 #endif
549 dst->f[0] = 1.0f / src->f[0];
550 dst->f[1] = 1.0f / src->f[1];
551 dst->f[2] = 1.0f / src->f[2];
552 dst->f[3] = 1.0f / src->f[3];
553 }
554
555 static void
micro_rnd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)556 micro_rnd(union tgsi_exec_channel *dst,
557 const union tgsi_exec_channel *src)
558 {
559 dst->f[0] = _mesa_roundevenf(src->f[0]);
560 dst->f[1] = _mesa_roundevenf(src->f[1]);
561 dst->f[2] = _mesa_roundevenf(src->f[2]);
562 dst->f[3] = _mesa_roundevenf(src->f[3]);
563 }
564
565 static void
micro_rsq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)566 micro_rsq(union tgsi_exec_channel *dst,
567 const union tgsi_exec_channel *src)
568 {
569 #if 0 /* for debugging */
570 assert(src->f[0] != 0.0f);
571 assert(src->f[1] != 0.0f);
572 assert(src->f[2] != 0.0f);
573 assert(src->f[3] != 0.0f);
574 #endif
575 dst->f[0] = 1.0f / sqrtf(src->f[0]);
576 dst->f[1] = 1.0f / sqrtf(src->f[1]);
577 dst->f[2] = 1.0f / sqrtf(src->f[2]);
578 dst->f[3] = 1.0f / sqrtf(src->f[3]);
579 }
580
581 static void
micro_sqrt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)582 micro_sqrt(union tgsi_exec_channel *dst,
583 const union tgsi_exec_channel *src)
584 {
585 dst->f[0] = sqrtf(src->f[0]);
586 dst->f[1] = sqrtf(src->f[1]);
587 dst->f[2] = sqrtf(src->f[2]);
588 dst->f[3] = sqrtf(src->f[3]);
589 }
590
591 static void
micro_seq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)592 micro_seq(union tgsi_exec_channel *dst,
593 const union tgsi_exec_channel *src0,
594 const union tgsi_exec_channel *src1)
595 {
596 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
597 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
598 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
599 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
600 }
601
602 static void
micro_sge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)603 micro_sge(union tgsi_exec_channel *dst,
604 const union tgsi_exec_channel *src0,
605 const union tgsi_exec_channel *src1)
606 {
607 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
608 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
609 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
610 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
611 }
612
613 static void
micro_sgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)614 micro_sgn(union tgsi_exec_channel *dst,
615 const union tgsi_exec_channel *src)
616 {
617 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
618 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
619 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
620 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
621 }
622
623 static void
micro_isgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)624 micro_isgn(union tgsi_exec_channel *dst,
625 const union tgsi_exec_channel *src)
626 {
627 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
628 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
629 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
630 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
631 }
632
633 static void
micro_sgt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)634 micro_sgt(union tgsi_exec_channel *dst,
635 const union tgsi_exec_channel *src0,
636 const union tgsi_exec_channel *src1)
637 {
638 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
639 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
640 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
641 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
642 }
643
644 static void
micro_sin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)645 micro_sin(union tgsi_exec_channel *dst,
646 const union tgsi_exec_channel *src)
647 {
648 dst->f[0] = sinf(src->f[0]);
649 dst->f[1] = sinf(src->f[1]);
650 dst->f[2] = sinf(src->f[2]);
651 dst->f[3] = sinf(src->f[3]);
652 }
653
654 static void
micro_sle(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)655 micro_sle(union tgsi_exec_channel *dst,
656 const union tgsi_exec_channel *src0,
657 const union tgsi_exec_channel *src1)
658 {
659 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
660 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
661 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
662 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
663 }
664
665 static void
micro_slt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)666 micro_slt(union tgsi_exec_channel *dst,
667 const union tgsi_exec_channel *src0,
668 const union tgsi_exec_channel *src1)
669 {
670 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
671 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
672 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
673 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
674 }
675
676 static void
micro_sne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)677 micro_sne(union tgsi_exec_channel *dst,
678 const union tgsi_exec_channel *src0,
679 const union tgsi_exec_channel *src1)
680 {
681 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
682 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
683 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
684 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
685 }
686
687 static void
micro_trunc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)688 micro_trunc(union tgsi_exec_channel *dst,
689 const union tgsi_exec_channel *src)
690 {
691 dst->f[0] = truncf(src->f[0]);
692 dst->f[1] = truncf(src->f[1]);
693 dst->f[2] = truncf(src->f[2]);
694 dst->f[3] = truncf(src->f[3]);
695 }
696
697 static void
micro_u2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)698 micro_u2d(union tgsi_double_channel *dst,
699 const union tgsi_exec_channel *src)
700 {
701 dst->d[0] = (double)src->u[0];
702 dst->d[1] = (double)src->u[1];
703 dst->d[2] = (double)src->u[2];
704 dst->d[3] = (double)src->u[3];
705 }
706
707 static void
micro_i64abs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)708 micro_i64abs(union tgsi_double_channel *dst,
709 const union tgsi_double_channel *src)
710 {
711 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
712 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
713 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
714 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
715 }
716
717 static void
micro_i64sgn(union tgsi_double_channel * dst,const union tgsi_double_channel * src)718 micro_i64sgn(union tgsi_double_channel *dst,
719 const union tgsi_double_channel *src)
720 {
721 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
722 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
723 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
724 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
725 }
726
727 static void
micro_i64neg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)728 micro_i64neg(union tgsi_double_channel *dst,
729 const union tgsi_double_channel *src)
730 {
731 dst->i64[0] = -src->i64[0];
732 dst->i64[1] = -src->i64[1];
733 dst->i64[2] = -src->i64[2];
734 dst->i64[3] = -src->i64[3];
735 }
736
737 static void
micro_u64seq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)738 micro_u64seq(union tgsi_double_channel *dst,
739 const union tgsi_double_channel *src)
740 {
741 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
742 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
743 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
744 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
745 }
746
747 static void
micro_u64sne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)748 micro_u64sne(union tgsi_double_channel *dst,
749 const union tgsi_double_channel *src)
750 {
751 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
752 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
753 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
754 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
755 }
756
757 static void
micro_i64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)758 micro_i64slt(union tgsi_double_channel *dst,
759 const union tgsi_double_channel *src)
760 {
761 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
762 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
763 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
764 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
765 }
766
767 static void
micro_u64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)768 micro_u64slt(union tgsi_double_channel *dst,
769 const union tgsi_double_channel *src)
770 {
771 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
772 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
773 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
774 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
775 }
776
777 static void
micro_i64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)778 micro_i64sge(union tgsi_double_channel *dst,
779 const union tgsi_double_channel *src)
780 {
781 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
782 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
783 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
784 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
785 }
786
787 static void
micro_u64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)788 micro_u64sge(union tgsi_double_channel *dst,
789 const union tgsi_double_channel *src)
790 {
791 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
792 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
793 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
794 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
795 }
796
797 static void
micro_u64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)798 micro_u64max(union tgsi_double_channel *dst,
799 const union tgsi_double_channel *src)
800 {
801 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
802 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
803 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
804 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
805 }
806
807 static void
micro_i64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)808 micro_i64max(union tgsi_double_channel *dst,
809 const union tgsi_double_channel *src)
810 {
811 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
812 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
813 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
814 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
815 }
816
817 static void
micro_u64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)818 micro_u64min(union tgsi_double_channel *dst,
819 const union tgsi_double_channel *src)
820 {
821 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
822 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
823 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
824 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
825 }
826
827 static void
micro_i64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)828 micro_i64min(union tgsi_double_channel *dst,
829 const union tgsi_double_channel *src)
830 {
831 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
832 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
833 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
834 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
835 }
836
837 static void
micro_u64add(union tgsi_double_channel * dst,const union tgsi_double_channel * src)838 micro_u64add(union tgsi_double_channel *dst,
839 const union tgsi_double_channel *src)
840 {
841 dst->u64[0] = src[0].u64[0] + src[1].u64[0];
842 dst->u64[1] = src[0].u64[1] + src[1].u64[1];
843 dst->u64[2] = src[0].u64[2] + src[1].u64[2];
844 dst->u64[3] = src[0].u64[3] + src[1].u64[3];
845 }
846
847 static void
micro_u64mul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)848 micro_u64mul(union tgsi_double_channel *dst,
849 const union tgsi_double_channel *src)
850 {
851 dst->u64[0] = src[0].u64[0] * src[1].u64[0];
852 dst->u64[1] = src[0].u64[1] * src[1].u64[1];
853 dst->u64[2] = src[0].u64[2] * src[1].u64[2];
854 dst->u64[3] = src[0].u64[3] * src[1].u64[3];
855 }
856
857 static void
micro_u64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)858 micro_u64div(union tgsi_double_channel *dst,
859 const union tgsi_double_channel *src)
860 {
861 dst->u64[0] = src[0].u64[0] / src[1].u64[0];
862 dst->u64[1] = src[0].u64[1] / src[1].u64[1];
863 dst->u64[2] = src[0].u64[2] / src[1].u64[2];
864 dst->u64[3] = src[0].u64[3] / src[1].u64[3];
865 }
866
867 static void
micro_i64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)868 micro_i64div(union tgsi_double_channel *dst,
869 const union tgsi_double_channel *src)
870 {
871 dst->i64[0] = src[0].i64[0] / src[1].i64[0];
872 dst->i64[1] = src[0].i64[1] / src[1].i64[1];
873 dst->i64[2] = src[0].i64[2] / src[1].i64[2];
874 dst->i64[3] = src[0].i64[3] / src[1].i64[3];
875 }
876
877 static void
micro_u64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)878 micro_u64mod(union tgsi_double_channel *dst,
879 const union tgsi_double_channel *src)
880 {
881 dst->u64[0] = src[0].u64[0] % src[1].u64[0];
882 dst->u64[1] = src[0].u64[1] % src[1].u64[1];
883 dst->u64[2] = src[0].u64[2] % src[1].u64[2];
884 dst->u64[3] = src[0].u64[3] % src[1].u64[3];
885 }
886
887 static void
micro_i64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)888 micro_i64mod(union tgsi_double_channel *dst,
889 const union tgsi_double_channel *src)
890 {
891 dst->i64[0] = src[0].i64[0] % src[1].i64[0];
892 dst->i64[1] = src[0].i64[1] % src[1].i64[1];
893 dst->i64[2] = src[0].i64[2] % src[1].i64[2];
894 dst->i64[3] = src[0].i64[3] % src[1].i64[3];
895 }
896
897 static void
micro_u64shl(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)898 micro_u64shl(union tgsi_double_channel *dst,
899 const union tgsi_double_channel *src0,
900 union tgsi_exec_channel *src1)
901 {
902 unsigned masked_count;
903 masked_count = src1->u[0] & 0x3f;
904 dst->u64[0] = src0->u64[0] << masked_count;
905 masked_count = src1->u[1] & 0x3f;
906 dst->u64[1] = src0->u64[1] << masked_count;
907 masked_count = src1->u[2] & 0x3f;
908 dst->u64[2] = src0->u64[2] << masked_count;
909 masked_count = src1->u[3] & 0x3f;
910 dst->u64[3] = src0->u64[3] << masked_count;
911 }
912
913 static void
micro_i64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)914 micro_i64shr(union tgsi_double_channel *dst,
915 const union tgsi_double_channel *src0,
916 union tgsi_exec_channel *src1)
917 {
918 unsigned masked_count;
919 masked_count = src1->u[0] & 0x3f;
920 dst->i64[0] = src0->i64[0] >> masked_count;
921 masked_count = src1->u[1] & 0x3f;
922 dst->i64[1] = src0->i64[1] >> masked_count;
923 masked_count = src1->u[2] & 0x3f;
924 dst->i64[2] = src0->i64[2] >> masked_count;
925 masked_count = src1->u[3] & 0x3f;
926 dst->i64[3] = src0->i64[3] >> masked_count;
927 }
928
929 static void
micro_u64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)930 micro_u64shr(union tgsi_double_channel *dst,
931 const union tgsi_double_channel *src0,
932 union tgsi_exec_channel *src1)
933 {
934 unsigned masked_count;
935 masked_count = src1->u[0] & 0x3f;
936 dst->u64[0] = src0->u64[0] >> masked_count;
937 masked_count = src1->u[1] & 0x3f;
938 dst->u64[1] = src0->u64[1] >> masked_count;
939 masked_count = src1->u[2] & 0x3f;
940 dst->u64[2] = src0->u64[2] >> masked_count;
941 masked_count = src1->u[3] & 0x3f;
942 dst->u64[3] = src0->u64[3] >> masked_count;
943 }
944
945 enum tgsi_exec_datatype {
946 TGSI_EXEC_DATA_FLOAT,
947 TGSI_EXEC_DATA_INT,
948 TGSI_EXEC_DATA_UINT,
949 TGSI_EXEC_DATA_DOUBLE,
950 TGSI_EXEC_DATA_INT64,
951 TGSI_EXEC_DATA_UINT64,
952 };
953
954 /*
955 * Shorthand locations of various utility registers (_I = Index, _C = Channel)
956 */
957 #define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I
958 #define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C
959 #define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I
960 #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C
961 #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I
962 #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C
963
964
965 /** The execution mask depends on the conditional mask and the loop mask */
966 #define UPDATE_EXEC_MASK(MACH) \
967 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
968
969
970 static const union tgsi_exec_channel ZeroVec =
971 { { 0.0, 0.0, 0.0, 0.0 } };
972
973 static const union tgsi_exec_channel OneVec = {
974 {1.0f, 1.0f, 1.0f, 1.0f}
975 };
976
977 static const union tgsi_exec_channel P128Vec = {
978 {128.0f, 128.0f, 128.0f, 128.0f}
979 };
980
981 static const union tgsi_exec_channel M128Vec = {
982 {-128.0f, -128.0f, -128.0f, -128.0f}
983 };
984
985
986 /**
987 * Assert that none of the float values in 'chan' are infinite or NaN.
988 * NaN and Inf may occur normally during program execution and should
989 * not lead to crashes, etc. But when debugging, it's helpful to catch
990 * them.
991 */
992 static inline void
check_inf_or_nan(const union tgsi_exec_channel * chan)993 check_inf_or_nan(const union tgsi_exec_channel *chan)
994 {
995 assert(!util_is_inf_or_nan((chan)->f[0]));
996 assert(!util_is_inf_or_nan((chan)->f[1]));
997 assert(!util_is_inf_or_nan((chan)->f[2]));
998 assert(!util_is_inf_or_nan((chan)->f[3]));
999 }
1000
1001
1002 #ifdef DEBUG
1003 static void
print_chan(const char * msg,const union tgsi_exec_channel * chan)1004 print_chan(const char *msg, const union tgsi_exec_channel *chan)
1005 {
1006 debug_printf("%s = {%f, %f, %f, %f}\n",
1007 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
1008 }
1009 #endif
1010
1011
1012 #ifdef DEBUG
1013 static void
print_temp(const struct tgsi_exec_machine * mach,uint index)1014 print_temp(const struct tgsi_exec_machine *mach, uint index)
1015 {
1016 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
1017 int i;
1018 debug_printf("Temp[%u] =\n", index);
1019 for (i = 0; i < 4; i++) {
1020 debug_printf(" %c: { %f, %f, %f, %f }\n",
1021 "XYZW"[i],
1022 tmp->xyzw[i].f[0],
1023 tmp->xyzw[i].f[1],
1024 tmp->xyzw[i].f[2],
1025 tmp->xyzw[i].f[3]);
1026 }
1027 }
1028 #endif
1029
1030
1031 void
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine * mach,unsigned num_bufs,const void ** bufs,const unsigned * buf_sizes)1032 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1033 unsigned num_bufs,
1034 const void **bufs,
1035 const unsigned *buf_sizes)
1036 {
1037 unsigned i;
1038
1039 for (i = 0; i < num_bufs; i++) {
1040 mach->Consts[i] = bufs[i];
1041 mach->ConstsSize[i] = buf_sizes[i];
1042 }
1043 }
1044
1045
1046 /**
1047 * Check if there's a potential src/dst register data dependency when
1048 * using SOA execution.
1049 * Example:
1050 * MOV T, T.yxwz;
1051 * This would expand into:
1052 * MOV t0, t1;
1053 * MOV t1, t0;
1054 * MOV t2, t3;
1055 * MOV t3, t2;
1056 * The second instruction will have the wrong value for t0 if executed as-is.
1057 */
1058 boolean
tgsi_check_soa_dependencies(const struct tgsi_full_instruction * inst)1059 tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
1060 {
1061 uint i, chan;
1062
1063 uint writemask = inst->Dst[0].Register.WriteMask;
1064 if (writemask == TGSI_WRITEMASK_X ||
1065 writemask == TGSI_WRITEMASK_Y ||
1066 writemask == TGSI_WRITEMASK_Z ||
1067 writemask == TGSI_WRITEMASK_W ||
1068 writemask == TGSI_WRITEMASK_NONE) {
1069 /* no chance of data dependency */
1070 return FALSE;
1071 }
1072
1073 /* loop over src regs */
1074 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1075 if ((inst->Src[i].Register.File ==
1076 inst->Dst[0].Register.File) &&
1077 ((inst->Src[i].Register.Index ==
1078 inst->Dst[0].Register.Index) ||
1079 inst->Src[i].Register.Indirect ||
1080 inst->Dst[0].Register.Indirect)) {
1081 /* loop over dest channels */
1082 uint channelsWritten = 0x0;
1083 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
1084 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
1085 /* check if we're reading a channel that's been written */
1086 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan);
1087 if (channelsWritten & (1 << swizzle)) {
1088 return TRUE;
1089 }
1090
1091 channelsWritten |= (1 << chan);
1092 }
1093 }
1094 }
1095 }
1096 return FALSE;
1097 }
1098
1099
1100 /**
1101 * Initialize machine state by expanding tokens to full instructions,
1102 * allocating temporary storage, setting up constants, etc.
1103 * After this, we can call tgsi_exec_machine_run() many times.
1104 */
1105 void
tgsi_exec_machine_bind_shader(struct tgsi_exec_machine * mach,const struct tgsi_token * tokens,struct tgsi_sampler * sampler,struct tgsi_image * image,struct tgsi_buffer * buffer)1106 tgsi_exec_machine_bind_shader(
1107 struct tgsi_exec_machine *mach,
1108 const struct tgsi_token *tokens,
1109 struct tgsi_sampler *sampler,
1110 struct tgsi_image *image,
1111 struct tgsi_buffer *buffer)
1112 {
1113 uint k;
1114 struct tgsi_parse_context parse;
1115 struct tgsi_full_instruction *instructions;
1116 struct tgsi_full_declaration *declarations;
1117 uint maxInstructions = 10, numInstructions = 0;
1118 uint maxDeclarations = 10, numDeclarations = 0;
1119
1120 #if 0
1121 tgsi_dump(tokens, 0);
1122 #endif
1123
1124 util_init_math();
1125
1126
1127 mach->Tokens = tokens;
1128 mach->Sampler = sampler;
1129 mach->Image = image;
1130 mach->Buffer = buffer;
1131
1132 if (!tokens) {
1133 /* unbind and free all */
1134 FREE(mach->Declarations);
1135 mach->Declarations = NULL;
1136 mach->NumDeclarations = 0;
1137
1138 FREE(mach->Instructions);
1139 mach->Instructions = NULL;
1140 mach->NumInstructions = 0;
1141
1142 return;
1143 }
1144
1145 k = tgsi_parse_init (&parse, mach->Tokens);
1146 if (k != TGSI_PARSE_OK) {
1147 debug_printf( "Problem parsing!\n" );
1148 return;
1149 }
1150
1151 mach->ImmLimit = 0;
1152 mach->NumOutputs = 0;
1153
1154 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1155 mach->SysSemanticToIndex[k] = -1;
1156
1157 if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1158 !mach->UsedGeometryShader) {
1159 struct tgsi_exec_vector *inputs;
1160 struct tgsi_exec_vector *outputs;
1161
1162 inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1163 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1164 16);
1165
1166 if (!inputs)
1167 return;
1168
1169 outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1170 TGSI_MAX_TOTAL_VERTICES, 16);
1171
1172 if (!outputs) {
1173 align_free(inputs);
1174 return;
1175 }
1176
1177 align_free(mach->Inputs);
1178 align_free(mach->Outputs);
1179
1180 mach->Inputs = inputs;
1181 mach->Outputs = outputs;
1182 mach->UsedGeometryShader = TRUE;
1183 }
1184
1185 declarations = (struct tgsi_full_declaration *)
1186 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1187
1188 if (!declarations) {
1189 return;
1190 }
1191
1192 instructions = (struct tgsi_full_instruction *)
1193 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1194
1195 if (!instructions) {
1196 FREE( declarations );
1197 return;
1198 }
1199
1200 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1201 uint i;
1202
1203 tgsi_parse_token( &parse );
1204 switch( parse.FullToken.Token.Type ) {
1205 case TGSI_TOKEN_TYPE_DECLARATION:
1206 /* save expanded declaration */
1207 if (numDeclarations == maxDeclarations) {
1208 declarations = REALLOC(declarations,
1209 maxDeclarations
1210 * sizeof(struct tgsi_full_declaration),
1211 (maxDeclarations + 10)
1212 * sizeof(struct tgsi_full_declaration));
1213 maxDeclarations += 10;
1214 }
1215 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) {
1216 unsigned reg;
1217 for (reg = parse.FullToken.FullDeclaration.Range.First;
1218 reg <= parse.FullToken.FullDeclaration.Range.Last;
1219 ++reg) {
1220 ++mach->NumOutputs;
1221 }
1222 }
1223 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1224 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1225 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1226 }
1227
1228 memcpy(declarations + numDeclarations,
1229 &parse.FullToken.FullDeclaration,
1230 sizeof(declarations[0]));
1231 numDeclarations++;
1232 break;
1233
1234 case TGSI_TOKEN_TYPE_IMMEDIATE:
1235 {
1236 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1237 assert( size <= 4 );
1238 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
1239
1240 for( i = 0; i < size; i++ ) {
1241 mach->Imms[mach->ImmLimit][i] =
1242 parse.FullToken.FullImmediate.u[i].Float;
1243 }
1244 mach->ImmLimit += 1;
1245 }
1246 break;
1247
1248 case TGSI_TOKEN_TYPE_INSTRUCTION:
1249
1250 /* save expanded instruction */
1251 if (numInstructions == maxInstructions) {
1252 instructions = REALLOC(instructions,
1253 maxInstructions
1254 * sizeof(struct tgsi_full_instruction),
1255 (maxInstructions + 10)
1256 * sizeof(struct tgsi_full_instruction));
1257 maxInstructions += 10;
1258 }
1259
1260 memcpy(instructions + numInstructions,
1261 &parse.FullToken.FullInstruction,
1262 sizeof(instructions[0]));
1263
1264 numInstructions++;
1265 break;
1266
1267 case TGSI_TOKEN_TYPE_PROPERTY:
1268 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1269 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1270 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1271 }
1272 }
1273 break;
1274
1275 default:
1276 assert( 0 );
1277 }
1278 }
1279 tgsi_parse_free (&parse);
1280
1281 FREE(mach->Declarations);
1282 mach->Declarations = declarations;
1283 mach->NumDeclarations = numDeclarations;
1284
1285 FREE(mach->Instructions);
1286 mach->Instructions = instructions;
1287 mach->NumInstructions = numInstructions;
1288 }
1289
1290
1291 struct tgsi_exec_machine *
tgsi_exec_machine_create(enum pipe_shader_type shader_type)1292 tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1293 {
1294 struct tgsi_exec_machine *mach;
1295 uint i;
1296
1297 mach = align_malloc( sizeof *mach, 16 );
1298 if (!mach)
1299 goto fail;
1300
1301 memset(mach, 0, sizeof(*mach));
1302
1303 mach->ShaderType = shader_type;
1304 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
1305 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
1306 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
1307
1308 if (shader_type != PIPE_SHADER_COMPUTE) {
1309 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1310 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1311 if (!mach->Inputs || !mach->Outputs)
1312 goto fail;
1313 }
1314
1315 /* Setup constants needed by the SSE2 executor. */
1316 for( i = 0; i < 4; i++ ) {
1317 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000;
1318 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF;
1319 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000;
1320 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */
1321 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f;
1322 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */
1323 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f;
1324 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f;
1325 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f;
1326 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f;
1327 }
1328
1329 #ifdef DEBUG
1330 /* silence warnings */
1331 (void) print_chan;
1332 (void) print_temp;
1333 #endif
1334
1335 return mach;
1336
1337 fail:
1338 if (mach) {
1339 align_free(mach->Inputs);
1340 align_free(mach->Outputs);
1341 align_free(mach);
1342 }
1343 return NULL;
1344 }
1345
1346
1347 void
tgsi_exec_machine_destroy(struct tgsi_exec_machine * mach)1348 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1349 {
1350 if (mach) {
1351 FREE(mach->Instructions);
1352 FREE(mach->Declarations);
1353
1354 align_free(mach->Inputs);
1355 align_free(mach->Outputs);
1356
1357 align_free(mach);
1358 }
1359 }
1360
1361 static void
micro_add(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1362 micro_add(union tgsi_exec_channel *dst,
1363 const union tgsi_exec_channel *src0,
1364 const union tgsi_exec_channel *src1)
1365 {
1366 dst->f[0] = src0->f[0] + src1->f[0];
1367 dst->f[1] = src0->f[1] + src1->f[1];
1368 dst->f[2] = src0->f[2] + src1->f[2];
1369 dst->f[3] = src0->f[3] + src1->f[3];
1370 }
1371
1372 static void
micro_div(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1373 micro_div(
1374 union tgsi_exec_channel *dst,
1375 const union tgsi_exec_channel *src0,
1376 const union tgsi_exec_channel *src1 )
1377 {
1378 if (src1->f[0] != 0) {
1379 dst->f[0] = src0->f[0] / src1->f[0];
1380 }
1381 if (src1->f[1] != 0) {
1382 dst->f[1] = src0->f[1] / src1->f[1];
1383 }
1384 if (src1->f[2] != 0) {
1385 dst->f[2] = src0->f[2] / src1->f[2];
1386 }
1387 if (src1->f[3] != 0) {
1388 dst->f[3] = src0->f[3] / src1->f[3];
1389 }
1390 }
1391
1392 static void
micro_lt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)1393 micro_lt(
1394 union tgsi_exec_channel *dst,
1395 const union tgsi_exec_channel *src0,
1396 const union tgsi_exec_channel *src1,
1397 const union tgsi_exec_channel *src2,
1398 const union tgsi_exec_channel *src3 )
1399 {
1400 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1401 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1402 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1403 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1404 }
1405
1406 static void
micro_max(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1407 micro_max(union tgsi_exec_channel *dst,
1408 const union tgsi_exec_channel *src0,
1409 const union tgsi_exec_channel *src1)
1410 {
1411 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0];
1412 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1];
1413 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2];
1414 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3];
1415 }
1416
1417 static void
micro_min(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1418 micro_min(union tgsi_exec_channel *dst,
1419 const union tgsi_exec_channel *src0,
1420 const union tgsi_exec_channel *src1)
1421 {
1422 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0];
1423 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1];
1424 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2];
1425 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3];
1426 }
1427
1428 static void
micro_mul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1429 micro_mul(union tgsi_exec_channel *dst,
1430 const union tgsi_exec_channel *src0,
1431 const union tgsi_exec_channel *src1)
1432 {
1433 dst->f[0] = src0->f[0] * src1->f[0];
1434 dst->f[1] = src0->f[1] * src1->f[1];
1435 dst->f[2] = src0->f[2] * src1->f[2];
1436 dst->f[3] = src0->f[3] * src1->f[3];
1437 }
1438
1439 static void
micro_neg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)1440 micro_neg(
1441 union tgsi_exec_channel *dst,
1442 const union tgsi_exec_channel *src )
1443 {
1444 dst->f[0] = -src->f[0];
1445 dst->f[1] = -src->f[1];
1446 dst->f[2] = -src->f[2];
1447 dst->f[3] = -src->f[3];
1448 }
1449
1450 static void
micro_pow(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1451 micro_pow(
1452 union tgsi_exec_channel *dst,
1453 const union tgsi_exec_channel *src0,
1454 const union tgsi_exec_channel *src1 )
1455 {
1456 #if FAST_MATH
1457 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] );
1458 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] );
1459 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] );
1460 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] );
1461 #else
1462 dst->f[0] = powf( src0->f[0], src1->f[0] );
1463 dst->f[1] = powf( src0->f[1], src1->f[1] );
1464 dst->f[2] = powf( src0->f[2], src1->f[2] );
1465 dst->f[3] = powf( src0->f[3], src1->f[3] );
1466 #endif
1467 }
1468
1469 static void
micro_sub(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1470 micro_sub(union tgsi_exec_channel *dst,
1471 const union tgsi_exec_channel *src0,
1472 const union tgsi_exec_channel *src1)
1473 {
1474 dst->f[0] = src0->f[0] - src1->f[0];
1475 dst->f[1] = src0->f[1] - src1->f[1];
1476 dst->f[2] = src0->f[2] - src1->f[2];
1477 dst->f[3] = src0->f[3] - src1->f[3];
1478 }
1479
1480 static void
fetch_src_file_channel(const struct tgsi_exec_machine * mach,const uint chan_index,const uint file,const uint swizzle,const union tgsi_exec_channel * index,const union tgsi_exec_channel * index2D,union tgsi_exec_channel * chan)1481 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1482 const uint chan_index,
1483 const uint file,
1484 const uint swizzle,
1485 const union tgsi_exec_channel *index,
1486 const union tgsi_exec_channel *index2D,
1487 union tgsi_exec_channel *chan)
1488 {
1489 uint i;
1490
1491 assert(swizzle < 4);
1492
1493 switch (file) {
1494 case TGSI_FILE_CONSTANT:
1495 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1496 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS);
1497 assert(mach->Consts[index2D->i[i]]);
1498
1499 if (index->i[i] < 0) {
1500 chan->u[i] = 0;
1501 } else {
1502 /* NOTE: copying the const value as a uint instead of float */
1503 const uint constbuf = index2D->i[i];
1504 const uint *buf = (const uint *)mach->Consts[constbuf];
1505 const int pos = index->i[i] * 4 + swizzle;
1506 /* const buffer bounds check */
1507 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) {
1508 if (0) {
1509 /* Debug: print warning */
1510 static int count = 0;
1511 if (count++ < 100)
1512 debug_printf("TGSI Exec: const buffer index %d"
1513 " out of bounds\n", pos);
1514 }
1515 chan->u[i] = 0;
1516 }
1517 else
1518 chan->u[i] = buf[pos];
1519 }
1520 }
1521 break;
1522
1523 case TGSI_FILE_INPUT:
1524 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1525 /*
1526 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1527 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1528 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1529 index2D->i[i], index->i[i]);
1530 }*/
1531 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1532 assert(pos >= 0);
1533 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1534 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1535 }
1536 break;
1537
1538 case TGSI_FILE_SYSTEM_VALUE:
1539 /* XXX no swizzling at this point. Will be needed if we put
1540 * gl_FragCoord, for example, in a sys value register.
1541 */
1542 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1543 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1544 }
1545 break;
1546
1547 case TGSI_FILE_TEMPORARY:
1548 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1549 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1550 assert(index2D->i[i] == 0);
1551
1552 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1553 }
1554 break;
1555
1556 case TGSI_FILE_IMMEDIATE:
1557 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1558 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1559 assert(index2D->i[i] == 0);
1560
1561 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1562 }
1563 break;
1564
1565 case TGSI_FILE_ADDRESS:
1566 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1567 assert(index->i[i] >= 0);
1568 assert(index2D->i[i] == 0);
1569
1570 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1571 }
1572 break;
1573
1574 case TGSI_FILE_PREDICATE:
1575 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1576 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS);
1577 assert(index2D->i[i] == 0);
1578
1579 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i];
1580 }
1581 break;
1582
1583 case TGSI_FILE_OUTPUT:
1584 /* vertex/fragment output vars can be read too */
1585 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1586 assert(index->i[i] >= 0);
1587 assert(index2D->i[i] == 0);
1588
1589 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1590 }
1591 break;
1592
1593 default:
1594 assert(0);
1595 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1596 chan->u[i] = 0;
1597 }
1598 }
1599 }
1600
1601 static void
fetch_source_d(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index,enum tgsi_exec_datatype src_datatype)1602 fetch_source_d(const struct tgsi_exec_machine *mach,
1603 union tgsi_exec_channel *chan,
1604 const struct tgsi_full_src_register *reg,
1605 const uint chan_index,
1606 enum tgsi_exec_datatype src_datatype)
1607 {
1608 union tgsi_exec_channel index;
1609 union tgsi_exec_channel index2D;
1610 uint swizzle;
1611
1612 /* We start with a direct index into a register file.
1613 *
1614 * file[1],
1615 * where:
1616 * file = Register.File
1617 * [1] = Register.Index
1618 */
1619 index.i[0] =
1620 index.i[1] =
1621 index.i[2] =
1622 index.i[3] = reg->Register.Index;
1623
1624 /* There is an extra source register that indirectly subscripts
1625 * a register file. The direct index now becomes an offset
1626 * that is being added to the indirect register.
1627 *
1628 * file[ind[2].x+1],
1629 * where:
1630 * ind = Indirect.File
1631 * [2] = Indirect.Index
1632 * .x = Indirect.SwizzleX
1633 */
1634 if (reg->Register.Indirect) {
1635 union tgsi_exec_channel index2;
1636 union tgsi_exec_channel indir_index;
1637 const uint execmask = mach->ExecMask;
1638 uint i;
1639
1640 /* which address register (always zero now) */
1641 index2.i[0] =
1642 index2.i[1] =
1643 index2.i[2] =
1644 index2.i[3] = reg->Indirect.Index;
1645 /* get current value of address register[swizzle] */
1646 swizzle = reg->Indirect.Swizzle;
1647 fetch_src_file_channel(mach,
1648 chan_index,
1649 reg->Indirect.File,
1650 swizzle,
1651 &index2,
1652 &ZeroVec,
1653 &indir_index);
1654
1655 /* add value of address register to the offset */
1656 index.i[0] += indir_index.i[0];
1657 index.i[1] += indir_index.i[1];
1658 index.i[2] += indir_index.i[2];
1659 index.i[3] += indir_index.i[3];
1660
1661 /* for disabled execution channels, zero-out the index to
1662 * avoid using a potential garbage value.
1663 */
1664 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1665 if ((execmask & (1 << i)) == 0)
1666 index.i[i] = 0;
1667 }
1668 }
1669
1670 /* There is an extra source register that is a second
1671 * subscript to a register file. Effectively it means that
1672 * the register file is actually a 2D array of registers.
1673 *
1674 * file[3][1],
1675 * where:
1676 * [3] = Dimension.Index
1677 */
1678 if (reg->Register.Dimension) {
1679 index2D.i[0] =
1680 index2D.i[1] =
1681 index2D.i[2] =
1682 index2D.i[3] = reg->Dimension.Index;
1683
1684 /* Again, the second subscript index can be addressed indirectly
1685 * identically to the first one.
1686 * Nothing stops us from indirectly addressing the indirect register,
1687 * but there is no need for that, so we won't exercise it.
1688 *
1689 * file[ind[4].y+3][1],
1690 * where:
1691 * ind = DimIndirect.File
1692 * [4] = DimIndirect.Index
1693 * .y = DimIndirect.SwizzleX
1694 */
1695 if (reg->Dimension.Indirect) {
1696 union tgsi_exec_channel index2;
1697 union tgsi_exec_channel indir_index;
1698 const uint execmask = mach->ExecMask;
1699 uint i;
1700
1701 index2.i[0] =
1702 index2.i[1] =
1703 index2.i[2] =
1704 index2.i[3] = reg->DimIndirect.Index;
1705
1706 swizzle = reg->DimIndirect.Swizzle;
1707 fetch_src_file_channel(mach,
1708 chan_index,
1709 reg->DimIndirect.File,
1710 swizzle,
1711 &index2,
1712 &ZeroVec,
1713 &indir_index);
1714
1715 index2D.i[0] += indir_index.i[0];
1716 index2D.i[1] += indir_index.i[1];
1717 index2D.i[2] += indir_index.i[2];
1718 index2D.i[3] += indir_index.i[3];
1719
1720 /* for disabled execution channels, zero-out the index to
1721 * avoid using a potential garbage value.
1722 */
1723 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1724 if ((execmask & (1 << i)) == 0) {
1725 index2D.i[i] = 0;
1726 }
1727 }
1728 }
1729
1730 /* If by any chance there was a need for a 3D array of register
1731 * files, we would have to check whether Dimension is followed
1732 * by a dimension register and continue the saga.
1733 */
1734 } else {
1735 index2D.i[0] =
1736 index2D.i[1] =
1737 index2D.i[2] =
1738 index2D.i[3] = 0;
1739 }
1740
1741 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1742 fetch_src_file_channel(mach,
1743 chan_index,
1744 reg->Register.File,
1745 swizzle,
1746 &index,
1747 &index2D,
1748 chan);
1749 }
1750
1751 static void
fetch_source(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index,enum tgsi_exec_datatype src_datatype)1752 fetch_source(const struct tgsi_exec_machine *mach,
1753 union tgsi_exec_channel *chan,
1754 const struct tgsi_full_src_register *reg,
1755 const uint chan_index,
1756 enum tgsi_exec_datatype src_datatype)
1757 {
1758 fetch_source_d(mach, chan, reg, chan_index, src_datatype);
1759
1760 if (reg->Register.Absolute) {
1761 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1762 micro_abs(chan, chan);
1763 } else {
1764 micro_iabs(chan, chan);
1765 }
1766 }
1767
1768 if (reg->Register.Negate) {
1769 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1770 micro_neg(chan, chan);
1771 } else {
1772 micro_ineg(chan, chan);
1773 }
1774 }
1775 }
1776
1777 static union tgsi_exec_channel *
store_dest_dstret(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_index,enum tgsi_exec_datatype dst_datatype)1778 store_dest_dstret(struct tgsi_exec_machine *mach,
1779 const union tgsi_exec_channel *chan,
1780 const struct tgsi_full_dst_register *reg,
1781 const struct tgsi_full_instruction *inst,
1782 uint chan_index,
1783 enum tgsi_exec_datatype dst_datatype)
1784 {
1785 uint i;
1786 static union tgsi_exec_channel null;
1787 union tgsi_exec_channel *dst;
1788 union tgsi_exec_channel index2D;
1789 uint execmask = mach->ExecMask;
1790 int offset = 0; /* indirection offset */
1791 int index;
1792
1793 /* for debugging */
1794 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) {
1795 check_inf_or_nan(chan);
1796 }
1797
1798 /* There is an extra source register that indirectly subscripts
1799 * a register file. The direct index now becomes an offset
1800 * that is being added to the indirect register.
1801 *
1802 * file[ind[2].x+1],
1803 * where:
1804 * ind = Indirect.File
1805 * [2] = Indirect.Index
1806 * .x = Indirect.SwizzleX
1807 */
1808 if (reg->Register.Indirect) {
1809 union tgsi_exec_channel index;
1810 union tgsi_exec_channel indir_index;
1811 uint swizzle;
1812
1813 /* which address register (always zero for now) */
1814 index.i[0] =
1815 index.i[1] =
1816 index.i[2] =
1817 index.i[3] = reg->Indirect.Index;
1818
1819 /* get current value of address register[swizzle] */
1820 swizzle = reg->Indirect.Swizzle;
1821
1822 /* fetch values from the address/indirection register */
1823 fetch_src_file_channel(mach,
1824 chan_index,
1825 reg->Indirect.File,
1826 swizzle,
1827 &index,
1828 &ZeroVec,
1829 &indir_index);
1830
1831 /* save indirection offset */
1832 offset = indir_index.i[0];
1833 }
1834
1835 /* There is an extra source register that is a second
1836 * subscript to a register file. Effectively it means that
1837 * the register file is actually a 2D array of registers.
1838 *
1839 * file[3][1],
1840 * where:
1841 * [3] = Dimension.Index
1842 */
1843 if (reg->Register.Dimension) {
1844 index2D.i[0] =
1845 index2D.i[1] =
1846 index2D.i[2] =
1847 index2D.i[3] = reg->Dimension.Index;
1848
1849 /* Again, the second subscript index can be addressed indirectly
1850 * identically to the first one.
1851 * Nothing stops us from indirectly addressing the indirect register,
1852 * but there is no need for that, so we won't exercise it.
1853 *
1854 * file[ind[4].y+3][1],
1855 * where:
1856 * ind = DimIndirect.File
1857 * [4] = DimIndirect.Index
1858 * .y = DimIndirect.SwizzleX
1859 */
1860 if (reg->Dimension.Indirect) {
1861 union tgsi_exec_channel index2;
1862 union tgsi_exec_channel indir_index;
1863 const uint execmask = mach->ExecMask;
1864 unsigned swizzle;
1865 uint i;
1866
1867 index2.i[0] =
1868 index2.i[1] =
1869 index2.i[2] =
1870 index2.i[3] = reg->DimIndirect.Index;
1871
1872 swizzle = reg->DimIndirect.Swizzle;
1873 fetch_src_file_channel(mach,
1874 chan_index,
1875 reg->DimIndirect.File,
1876 swizzle,
1877 &index2,
1878 &ZeroVec,
1879 &indir_index);
1880
1881 index2D.i[0] += indir_index.i[0];
1882 index2D.i[1] += indir_index.i[1];
1883 index2D.i[2] += indir_index.i[2];
1884 index2D.i[3] += indir_index.i[3];
1885
1886 /* for disabled execution channels, zero-out the index to
1887 * avoid using a potential garbage value.
1888 */
1889 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1890 if ((execmask & (1 << i)) == 0) {
1891 index2D.i[i] = 0;
1892 }
1893 }
1894 }
1895
1896 /* If by any chance there was a need for a 3D array of register
1897 * files, we would have to check whether Dimension is followed
1898 * by a dimension register and continue the saga.
1899 */
1900 } else {
1901 index2D.i[0] =
1902 index2D.i[1] =
1903 index2D.i[2] =
1904 index2D.i[3] = 0;
1905 }
1906
1907 switch (reg->Register.File) {
1908 case TGSI_FILE_NULL:
1909 dst = &null;
1910 break;
1911
1912 case TGSI_FILE_OUTPUT:
1913 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
1914 + reg->Register.Index;
1915 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1916 #if 0
1917 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1918 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1919 reg->Register.Index);
1920 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1921 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1922 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1923 if (execmask & (1 << i))
1924 debug_printf("%f, ", chan->f[i]);
1925 debug_printf(")\n");
1926 }
1927 #endif
1928 break;
1929
1930 case TGSI_FILE_TEMPORARY:
1931 index = reg->Register.Index;
1932 assert( index < TGSI_EXEC_NUM_TEMPS );
1933 dst = &mach->Temps[offset + index].xyzw[chan_index];
1934 break;
1935
1936 case TGSI_FILE_ADDRESS:
1937 index = reg->Register.Index;
1938 dst = &mach->Addrs[index].xyzw[chan_index];
1939 break;
1940
1941 case TGSI_FILE_PREDICATE:
1942 index = reg->Register.Index;
1943 assert(index < TGSI_EXEC_NUM_PREDS);
1944 dst = &mach->Predicates[index].xyzw[chan_index];
1945 break;
1946
1947 default:
1948 assert( 0 );
1949 return NULL;
1950 }
1951
1952 if (inst->Instruction.Predicate) {
1953 uint swizzle;
1954 union tgsi_exec_channel *pred;
1955
1956 switch (chan_index) {
1957 case TGSI_CHAN_X:
1958 swizzle = inst->Predicate.SwizzleX;
1959 break;
1960 case TGSI_CHAN_Y:
1961 swizzle = inst->Predicate.SwizzleY;
1962 break;
1963 case TGSI_CHAN_Z:
1964 swizzle = inst->Predicate.SwizzleZ;
1965 break;
1966 case TGSI_CHAN_W:
1967 swizzle = inst->Predicate.SwizzleW;
1968 break;
1969 default:
1970 assert(0);
1971 return NULL;
1972 }
1973
1974 assert(inst->Predicate.Index == 0);
1975
1976 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle];
1977
1978 if (inst->Predicate.Negate) {
1979 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1980 if (pred->u[i]) {
1981 execmask &= ~(1 << i);
1982 }
1983 }
1984 } else {
1985 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1986 if (!pred->u[i]) {
1987 execmask &= ~(1 << i);
1988 }
1989 }
1990 }
1991 }
1992
1993 return dst;
1994 }
1995
1996 static void
store_dest_double(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_index,enum tgsi_exec_datatype dst_datatype)1997 store_dest_double(struct tgsi_exec_machine *mach,
1998 const union tgsi_exec_channel *chan,
1999 const struct tgsi_full_dst_register *reg,
2000 const struct tgsi_full_instruction *inst,
2001 uint chan_index,
2002 enum tgsi_exec_datatype dst_datatype)
2003 {
2004 union tgsi_exec_channel *dst;
2005 const uint execmask = mach->ExecMask;
2006 int i;
2007
2008 dst = store_dest_dstret(mach, chan, reg, inst, chan_index,
2009 dst_datatype);
2010 if (!dst)
2011 return;
2012
2013 /* doubles path */
2014 for (i = 0; i < TGSI_QUAD_SIZE; i++)
2015 if (execmask & (1 << i))
2016 dst->i[i] = chan->i[i];
2017 }
2018
2019 static void
store_dest(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_index,enum tgsi_exec_datatype dst_datatype)2020 store_dest(struct tgsi_exec_machine *mach,
2021 const union tgsi_exec_channel *chan,
2022 const struct tgsi_full_dst_register *reg,
2023 const struct tgsi_full_instruction *inst,
2024 uint chan_index,
2025 enum tgsi_exec_datatype dst_datatype)
2026 {
2027 union tgsi_exec_channel *dst;
2028 const uint execmask = mach->ExecMask;
2029 int i;
2030
2031 dst = store_dest_dstret(mach, chan, reg, inst, chan_index,
2032 dst_datatype);
2033 if (!dst)
2034 return;
2035
2036 if (!inst->Instruction.Saturate) {
2037 for (i = 0; i < TGSI_QUAD_SIZE; i++)
2038 if (execmask & (1 << i))
2039 dst->i[i] = chan->i[i];
2040 }
2041 else {
2042 for (i = 0; i < TGSI_QUAD_SIZE; i++)
2043 if (execmask & (1 << i)) {
2044 if (chan->f[i] < 0.0f)
2045 dst->f[i] = 0.0f;
2046 else if (chan->f[i] > 1.0f)
2047 dst->f[i] = 1.0f;
2048 else
2049 dst->i[i] = chan->i[i];
2050 }
2051 }
2052 }
2053
2054 #define FETCH(VAL,INDEX,CHAN)\
2055 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
2056
2057 #define IFETCH(VAL,INDEX,CHAN)\
2058 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
2059
2060
2061 /**
2062 * Execute ARB-style KIL which is predicated by a src register.
2063 * Kill fragment if any of the four values is less than zero.
2064 */
2065 static void
exec_kill_if(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2066 exec_kill_if(struct tgsi_exec_machine *mach,
2067 const struct tgsi_full_instruction *inst)
2068 {
2069 uint uniquemask;
2070 uint chan_index;
2071 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2072 union tgsi_exec_channel r[1];
2073
2074 /* This mask stores component bits that were already tested. */
2075 uniquemask = 0;
2076
2077 for (chan_index = 0; chan_index < 4; chan_index++)
2078 {
2079 uint swizzle;
2080 uint i;
2081
2082 /* unswizzle channel */
2083 swizzle = tgsi_util_get_full_src_register_swizzle (
2084 &inst->Src[0],
2085 chan_index);
2086
2087 /* check if the component has not been already tested */
2088 if (uniquemask & (1 << swizzle))
2089 continue;
2090 uniquemask |= 1 << swizzle;
2091
2092 FETCH(&r[0], 0, chan_index);
2093 for (i = 0; i < 4; i++)
2094 if (r[0].f[i] < 0.0f)
2095 kilmask |= 1 << i;
2096 }
2097
2098 /* restrict to fragments currently executing */
2099 kilmask &= mach->ExecMask;
2100
2101 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2102 }
2103
2104 /**
2105 * Unconditional fragment kill/discard.
2106 */
2107 static void
exec_kill(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2108 exec_kill(struct tgsi_exec_machine *mach,
2109 const struct tgsi_full_instruction *inst)
2110 {
2111 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
2112
2113 /* kill fragment for all fragments currently executing */
2114 kilmask = mach->ExecMask;
2115 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask;
2116 }
2117
2118 static void
emit_vertex(struct tgsi_exec_machine * mach)2119 emit_vertex(struct tgsi_exec_machine *mach)
2120 {
2121 /* FIXME: check for exec mask correctly
2122 unsigned i;
2123 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2124 if ((mach->ExecMask & (1 << i)))
2125 */
2126 if (mach->ExecMask) {
2127 if (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= mach->MaxOutputVertices)
2128 return;
2129
2130 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs;
2131 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++;
2132 }
2133 }
2134
2135 static void
emit_primitive(struct tgsi_exec_machine * mach)2136 emit_primitive(struct tgsi_exec_machine *mach)
2137 {
2138 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0];
2139 /* FIXME: check for exec mask correctly
2140 unsigned i;
2141 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
2142 if ((mach->ExecMask & (1 << i)))
2143 */
2144 if (mach->ExecMask) {
2145 ++(*prim_count);
2146 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs);
2147 mach->Primitives[*prim_count] = 0;
2148 }
2149 }
2150
2151 static void
conditional_emit_primitive(struct tgsi_exec_machine * mach)2152 conditional_emit_primitive(struct tgsi_exec_machine *mach)
2153 {
2154 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
2155 int emitted_verts =
2156 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]];
2157 if (emitted_verts) {
2158 emit_primitive(mach);
2159 }
2160 }
2161 }
2162
2163
2164 /*
2165 * Fetch four texture samples using STR texture coordinates.
2166 */
2167 static void
fetch_texel(struct tgsi_sampler * sampler,const unsigned sview_idx,const unsigned sampler_idx,const union tgsi_exec_channel * s,const union tgsi_exec_channel * t,const union tgsi_exec_channel * p,const union tgsi_exec_channel * c0,const union tgsi_exec_channel * c1,float derivs[3][2][TGSI_QUAD_SIZE],const int8_t offset[3],enum tgsi_sampler_control control,union tgsi_exec_channel * r,union tgsi_exec_channel * g,union tgsi_exec_channel * b,union tgsi_exec_channel * a)2168 fetch_texel( struct tgsi_sampler *sampler,
2169 const unsigned sview_idx,
2170 const unsigned sampler_idx,
2171 const union tgsi_exec_channel *s,
2172 const union tgsi_exec_channel *t,
2173 const union tgsi_exec_channel *p,
2174 const union tgsi_exec_channel *c0,
2175 const union tgsi_exec_channel *c1,
2176 float derivs[3][2][TGSI_QUAD_SIZE],
2177 const int8_t offset[3],
2178 enum tgsi_sampler_control control,
2179 union tgsi_exec_channel *r,
2180 union tgsi_exec_channel *g,
2181 union tgsi_exec_channel *b,
2182 union tgsi_exec_channel *a )
2183 {
2184 uint j;
2185 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2186
2187 /* FIXME: handle explicit derivs, offsets */
2188 sampler->get_samples(sampler, sview_idx, sampler_idx,
2189 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
2190
2191 for (j = 0; j < 4; j++) {
2192 r->f[j] = rgba[0][j];
2193 g->f[j] = rgba[1][j];
2194 b->f[j] = rgba[2][j];
2195 a->f[j] = rgba[3][j];
2196 }
2197 }
2198
2199
2200 #define TEX_MODIFIER_NONE 0
2201 #define TEX_MODIFIER_PROJECTED 1
2202 #define TEX_MODIFIER_LOD_BIAS 2
2203 #define TEX_MODIFIER_EXPLICIT_LOD 3
2204 #define TEX_MODIFIER_LEVEL_ZERO 4
2205 #define TEX_MODIFIER_GATHER 5
2206
2207 /*
2208 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2209 */
2210 static void
fetch_texel_offsets(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int8_t offsets[3])2211 fetch_texel_offsets(struct tgsi_exec_machine *mach,
2212 const struct tgsi_full_instruction *inst,
2213 int8_t offsets[3])
2214 {
2215 if (inst->Texture.NumOffsets == 1) {
2216 union tgsi_exec_channel index;
2217 union tgsi_exec_channel offset[3];
2218 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2219 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2220 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2221 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2222 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2223 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File,
2224 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2225 offsets[0] = offset[0].i[0];
2226 offsets[1] = offset[1].i[0];
2227 offsets[2] = offset[2].i[0];
2228 } else {
2229 assert(inst->Texture.NumOffsets == 0);
2230 offsets[0] = offsets[1] = offsets[2] = 0;
2231 }
2232 }
2233
2234
2235 /*
2236 * Fetch dx and dy values for one channel (s, t or r).
2237 * Put dx values into one float array, dy values into another.
2238 */
2239 static void
fetch_assign_deriv_channel(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,unsigned regdsrcx,unsigned chan,float derivs[2][TGSI_QUAD_SIZE])2240 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2241 const struct tgsi_full_instruction *inst,
2242 unsigned regdsrcx,
2243 unsigned chan,
2244 float derivs[2][TGSI_QUAD_SIZE])
2245 {
2246 union tgsi_exec_channel d;
2247 FETCH(&d, regdsrcx, chan);
2248 derivs[0][0] = d.f[0];
2249 derivs[0][1] = d.f[1];
2250 derivs[0][2] = d.f[2];
2251 derivs[0][3] = d.f[3];
2252 FETCH(&d, regdsrcx + 1, chan);
2253 derivs[1][0] = d.f[0];
2254 derivs[1][1] = d.f[1];
2255 derivs[1][2] = d.f[2];
2256 derivs[1][3] = d.f[3];
2257 }
2258
2259 static uint
fetch_sampler_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint sampler)2260 fetch_sampler_unit(struct tgsi_exec_machine *mach,
2261 const struct tgsi_full_instruction *inst,
2262 uint sampler)
2263 {
2264 uint unit = 0;
2265 int i;
2266 if (inst->Src[sampler].Register.Indirect) {
2267 const struct tgsi_full_src_register *reg = &inst->Src[sampler];
2268 union tgsi_exec_channel indir_index, index2;
2269 const uint execmask = mach->ExecMask;
2270 index2.i[0] =
2271 index2.i[1] =
2272 index2.i[2] =
2273 index2.i[3] = reg->Indirect.Index;
2274
2275 fetch_src_file_channel(mach,
2276 0,
2277 reg->Indirect.File,
2278 reg->Indirect.Swizzle,
2279 &index2,
2280 &ZeroVec,
2281 &indir_index);
2282 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2283 if (execmask & (1 << i)) {
2284 unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2285 break;
2286 }
2287 }
2288
2289 } else {
2290 unit = inst->Src[sampler].Register.Index;
2291 }
2292 return unit;
2293 }
2294
2295 /*
2296 * execute a texture instruction.
2297 *
2298 * modifier is used to control the channel routing for the
2299 * instruction variants like proj, lod, and texture with lod bias.
2300 * sampler indicates which src register the sampler is contained in.
2301 */
2302 static void
exec_tex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,uint sampler)2303 exec_tex(struct tgsi_exec_machine *mach,
2304 const struct tgsi_full_instruction *inst,
2305 uint modifier, uint sampler)
2306 {
2307 const union tgsi_exec_channel *args[5], *proj = NULL;
2308 union tgsi_exec_channel r[5];
2309 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2310 uint chan;
2311 uint unit;
2312 int8_t offsets[3];
2313 int dim, shadow_ref, i;
2314
2315 unit = fetch_sampler_unit(mach, inst, sampler);
2316 /* always fetch all 3 offsets, overkill but keeps code simple */
2317 fetch_texel_offsets(mach, inst, offsets);
2318
2319 assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2320 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2321
2322 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2323 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2324
2325 assert(dim <= 4);
2326 if (shadow_ref >= 0)
2327 assert(shadow_ref >= dim && shadow_ref < ARRAY_SIZE(args));
2328
2329 /* fetch modifier to the last argument */
2330 if (modifier != TEX_MODIFIER_NONE) {
2331 const int last = ARRAY_SIZE(args) - 1;
2332
2333 /* fetch modifier from src0.w or src1.x */
2334 if (sampler == 1) {
2335 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2336 FETCH(&r[last], 0, TGSI_CHAN_W);
2337 }
2338 else {
2339 assert(shadow_ref != 4);
2340 FETCH(&r[last], 1, TGSI_CHAN_X);
2341 }
2342
2343 if (modifier != TEX_MODIFIER_PROJECTED) {
2344 args[last] = &r[last];
2345 }
2346 else {
2347 proj = &r[last];
2348 args[last] = &ZeroVec;
2349 }
2350
2351 /* point unused arguments to zero vector */
2352 for (i = dim; i < last; i++)
2353 args[i] = &ZeroVec;
2354
2355 if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2356 control = TGSI_SAMPLER_LOD_EXPLICIT;
2357 else if (modifier == TEX_MODIFIER_LOD_BIAS)
2358 control = TGSI_SAMPLER_LOD_BIAS;
2359 else if (modifier == TEX_MODIFIER_GATHER)
2360 control = TGSI_SAMPLER_GATHER;
2361 }
2362 else {
2363 for (i = dim; i < ARRAY_SIZE(args); i++)
2364 args[i] = &ZeroVec;
2365 }
2366
2367 /* fetch coordinates */
2368 for (i = 0; i < dim; i++) {
2369 FETCH(&r[i], 0, TGSI_CHAN_X + i);
2370
2371 if (proj)
2372 micro_div(&r[i], &r[i], proj);
2373
2374 args[i] = &r[i];
2375 }
2376
2377 /* fetch reference value */
2378 if (shadow_ref >= 0) {
2379 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2380
2381 if (proj)
2382 micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2383
2384 args[shadow_ref] = &r[shadow_ref];
2385 }
2386
2387 fetch_texel(mach->Sampler, unit, unit,
2388 args[0], args[1], args[2], args[3], args[4],
2389 NULL, offsets, control,
2390 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2391
2392 #if 0
2393 debug_printf("fetch r: %g %g %g %g\n",
2394 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2395 debug_printf("fetch g: %g %g %g %g\n",
2396 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2397 debug_printf("fetch b: %g %g %g %g\n",
2398 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2399 debug_printf("fetch a: %g %g %g %g\n",
2400 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2401 #endif
2402
2403 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2404 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2405 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2406 }
2407 }
2408 }
2409
2410 static void
exec_lodq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2411 exec_lodq(struct tgsi_exec_machine *mach,
2412 const struct tgsi_full_instruction *inst)
2413 {
2414 uint unit;
2415 int dim;
2416 int i;
2417 union tgsi_exec_channel coords[4];
2418 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2419 union tgsi_exec_channel r[2];
2420
2421 unit = fetch_sampler_unit(mach, inst, 1);
2422 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2423 assert(dim <= ARRAY_SIZE(coords));
2424 /* fetch coordinates */
2425 for (i = 0; i < dim; i++) {
2426 FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2427 args[i] = &coords[i];
2428 }
2429 for (i = dim; i < ARRAY_SIZE(coords); i++) {
2430 args[i] = &ZeroVec;
2431 }
2432 mach->Sampler->query_lod(mach->Sampler, unit, unit,
2433 args[0]->f,
2434 args[1]->f,
2435 args[2]->f,
2436 args[3]->f,
2437 TGSI_SAMPLER_LOD_NONE,
2438 r[0].f,
2439 r[1].f);
2440
2441 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2442 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
2443 TGSI_EXEC_DATA_FLOAT);
2444 }
2445 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2446 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
2447 TGSI_EXEC_DATA_FLOAT);
2448 }
2449 }
2450
2451 static void
exec_txd(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2452 exec_txd(struct tgsi_exec_machine *mach,
2453 const struct tgsi_full_instruction *inst)
2454 {
2455 union tgsi_exec_channel r[4];
2456 float derivs[3][2][TGSI_QUAD_SIZE];
2457 uint chan;
2458 uint unit;
2459 int8_t offsets[3];
2460
2461 unit = fetch_sampler_unit(mach, inst, 3);
2462 /* always fetch all 3 offsets, overkill but keeps code simple */
2463 fetch_texel_offsets(mach, inst, offsets);
2464
2465 switch (inst->Texture.Texture) {
2466 case TGSI_TEXTURE_1D:
2467 FETCH(&r[0], 0, TGSI_CHAN_X);
2468
2469 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2470
2471 fetch_texel(mach->Sampler, unit, unit,
2472 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2473 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2474 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2475 break;
2476
2477 case TGSI_TEXTURE_SHADOW1D:
2478 case TGSI_TEXTURE_1D_ARRAY:
2479 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2480 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2481 FETCH(&r[0], 0, TGSI_CHAN_X);
2482 FETCH(&r[1], 0, TGSI_CHAN_Y);
2483 FETCH(&r[2], 0, TGSI_CHAN_Z);
2484
2485 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2486
2487 fetch_texel(mach->Sampler, unit, unit,
2488 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2489 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2490 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2491 break;
2492
2493 case TGSI_TEXTURE_2D:
2494 case TGSI_TEXTURE_RECT:
2495 FETCH(&r[0], 0, TGSI_CHAN_X);
2496 FETCH(&r[1], 0, TGSI_CHAN_Y);
2497
2498 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2499 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2500
2501 fetch_texel(mach->Sampler, unit, unit,
2502 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2503 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2504 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2505 break;
2506
2507
2508 case TGSI_TEXTURE_SHADOW2D:
2509 case TGSI_TEXTURE_SHADOWRECT:
2510 case TGSI_TEXTURE_2D_ARRAY:
2511 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2512 /* only SHADOW2D_ARRAY actually needs W */
2513 FETCH(&r[0], 0, TGSI_CHAN_X);
2514 FETCH(&r[1], 0, TGSI_CHAN_Y);
2515 FETCH(&r[2], 0, TGSI_CHAN_Z);
2516 FETCH(&r[3], 0, TGSI_CHAN_W);
2517
2518 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2519 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2520
2521 fetch_texel(mach->Sampler, unit, unit,
2522 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2523 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2524 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2525 break;
2526
2527 case TGSI_TEXTURE_3D:
2528 case TGSI_TEXTURE_CUBE:
2529 case TGSI_TEXTURE_CUBE_ARRAY:
2530 case TGSI_TEXTURE_SHADOWCUBE:
2531 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2532 FETCH(&r[0], 0, TGSI_CHAN_X);
2533 FETCH(&r[1], 0, TGSI_CHAN_Y);
2534 FETCH(&r[2], 0, TGSI_CHAN_Z);
2535 FETCH(&r[3], 0, TGSI_CHAN_W);
2536
2537 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2538 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2539 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2540
2541 fetch_texel(mach->Sampler, unit, unit,
2542 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2543 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2544 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2545 break;
2546
2547 default:
2548 assert(0);
2549 }
2550
2551 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2552 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2553 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2554 }
2555 }
2556 }
2557
2558
2559 static void
exec_txf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2560 exec_txf(struct tgsi_exec_machine *mach,
2561 const struct tgsi_full_instruction *inst)
2562 {
2563 union tgsi_exec_channel r[4];
2564 uint chan;
2565 uint unit;
2566 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2567 int j;
2568 int8_t offsets[3];
2569 unsigned target;
2570
2571 unit = fetch_sampler_unit(mach, inst, 1);
2572 /* always fetch all 3 offsets, overkill but keeps code simple */
2573 fetch_texel_offsets(mach, inst, offsets);
2574
2575 IFETCH(&r[3], 0, TGSI_CHAN_W);
2576
2577 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2578 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2579 target = mach->SamplerViews[unit].Resource;
2580 }
2581 else {
2582 target = inst->Texture.Texture;
2583 }
2584 switch(target) {
2585 case TGSI_TEXTURE_3D:
2586 case TGSI_TEXTURE_2D_ARRAY:
2587 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2588 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2589 IFETCH(&r[2], 0, TGSI_CHAN_Z);
2590 /* fallthrough */
2591 case TGSI_TEXTURE_2D:
2592 case TGSI_TEXTURE_RECT:
2593 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2594 case TGSI_TEXTURE_SHADOW2D:
2595 case TGSI_TEXTURE_SHADOWRECT:
2596 case TGSI_TEXTURE_1D_ARRAY:
2597 case TGSI_TEXTURE_2D_MSAA:
2598 IFETCH(&r[1], 0, TGSI_CHAN_Y);
2599 /* fallthrough */
2600 case TGSI_TEXTURE_BUFFER:
2601 case TGSI_TEXTURE_1D:
2602 case TGSI_TEXTURE_SHADOW1D:
2603 IFETCH(&r[0], 0, TGSI_CHAN_X);
2604 break;
2605 default:
2606 assert(0);
2607 break;
2608 }
2609
2610 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2611 offsets, rgba);
2612
2613 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2614 r[0].f[j] = rgba[0][j];
2615 r[1].f[j] = rgba[1][j];
2616 r[2].f[j] = rgba[2][j];
2617 r[3].f[j] = rgba[3][j];
2618 }
2619
2620 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2621 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2622 unsigned char swizzles[4];
2623 swizzles[0] = inst->Src[1].Register.SwizzleX;
2624 swizzles[1] = inst->Src[1].Register.SwizzleY;
2625 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2626 swizzles[3] = inst->Src[1].Register.SwizzleW;
2627
2628 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2629 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2630 store_dest(mach, &r[swizzles[chan]],
2631 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2632 }
2633 }
2634 }
2635 else {
2636 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2637 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2638 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2639 }
2640 }
2641 }
2642 }
2643
2644 static void
exec_txq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2645 exec_txq(struct tgsi_exec_machine *mach,
2646 const struct tgsi_full_instruction *inst)
2647 {
2648 int result[4];
2649 union tgsi_exec_channel r[4], src;
2650 uint chan;
2651 uint unit;
2652 int i,j;
2653
2654 unit = fetch_sampler_unit(mach, inst, 1);
2655
2656 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2657
2658 /* XXX: This interface can't return per-pixel values */
2659 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2660
2661 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2662 for (j = 0; j < 4; j++) {
2663 r[j].i[i] = result[j];
2664 }
2665 }
2666
2667 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2668 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2669 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
2670 TGSI_EXEC_DATA_INT);
2671 }
2672 }
2673 }
2674
2675 static void
exec_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,boolean compare)2676 exec_sample(struct tgsi_exec_machine *mach,
2677 const struct tgsi_full_instruction *inst,
2678 uint modifier, boolean compare)
2679 {
2680 const uint resource_unit = inst->Src[1].Register.Index;
2681 const uint sampler_unit = inst->Src[2].Register.Index;
2682 union tgsi_exec_channel r[5], c1;
2683 const union tgsi_exec_channel *lod = &ZeroVec;
2684 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2685 uint chan;
2686 unsigned char swizzles[4];
2687 int8_t offsets[3];
2688
2689 /* always fetch all 3 offsets, overkill but keeps code simple */
2690 fetch_texel_offsets(mach, inst, offsets);
2691
2692 assert(modifier != TEX_MODIFIER_PROJECTED);
2693
2694 if (modifier != TEX_MODIFIER_NONE) {
2695 if (modifier == TEX_MODIFIER_LOD_BIAS) {
2696 FETCH(&c1, 3, TGSI_CHAN_X);
2697 lod = &c1;
2698 control = TGSI_SAMPLER_LOD_BIAS;
2699 }
2700 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2701 FETCH(&c1, 3, TGSI_CHAN_X);
2702 lod = &c1;
2703 control = TGSI_SAMPLER_LOD_EXPLICIT;
2704 }
2705 else {
2706 assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2707 control = TGSI_SAMPLER_LOD_ZERO;
2708 }
2709 }
2710
2711 FETCH(&r[0], 0, TGSI_CHAN_X);
2712
2713 switch (mach->SamplerViews[resource_unit].Resource) {
2714 case TGSI_TEXTURE_1D:
2715 if (compare) {
2716 FETCH(&r[2], 3, TGSI_CHAN_X);
2717 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2718 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2719 NULL, offsets, control,
2720 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2721 }
2722 else {
2723 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2724 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2725 NULL, offsets, control,
2726 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2727 }
2728 break;
2729
2730 case TGSI_TEXTURE_1D_ARRAY:
2731 case TGSI_TEXTURE_2D:
2732 case TGSI_TEXTURE_RECT:
2733 FETCH(&r[1], 0, TGSI_CHAN_Y);
2734 if (compare) {
2735 FETCH(&r[2], 3, TGSI_CHAN_X);
2736 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2737 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2738 NULL, offsets, control,
2739 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2740 }
2741 else {
2742 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2743 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2744 NULL, offsets, control,
2745 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2746 }
2747 break;
2748
2749 case TGSI_TEXTURE_2D_ARRAY:
2750 case TGSI_TEXTURE_3D:
2751 case TGSI_TEXTURE_CUBE:
2752 FETCH(&r[1], 0, TGSI_CHAN_Y);
2753 FETCH(&r[2], 0, TGSI_CHAN_Z);
2754 if(compare) {
2755 FETCH(&r[3], 3, TGSI_CHAN_X);
2756 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2757 &r[0], &r[1], &r[2], &r[3], lod,
2758 NULL, offsets, control,
2759 &r[0], &r[1], &r[2], &r[3]);
2760 }
2761 else {
2762 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2763 &r[0], &r[1], &r[2], &ZeroVec, lod,
2764 NULL, offsets, control,
2765 &r[0], &r[1], &r[2], &r[3]);
2766 }
2767 break;
2768
2769 case TGSI_TEXTURE_CUBE_ARRAY:
2770 FETCH(&r[1], 0, TGSI_CHAN_Y);
2771 FETCH(&r[2], 0, TGSI_CHAN_Z);
2772 FETCH(&r[3], 0, TGSI_CHAN_W);
2773 if(compare) {
2774 FETCH(&r[4], 3, TGSI_CHAN_X);
2775 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2776 &r[0], &r[1], &r[2], &r[3], &r[4],
2777 NULL, offsets, control,
2778 &r[0], &r[1], &r[2], &r[3]);
2779 }
2780 else {
2781 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2782 &r[0], &r[1], &r[2], &r[3], lod,
2783 NULL, offsets, control,
2784 &r[0], &r[1], &r[2], &r[3]);
2785 }
2786 break;
2787
2788
2789 default:
2790 assert(0);
2791 }
2792
2793 swizzles[0] = inst->Src[1].Register.SwizzleX;
2794 swizzles[1] = inst->Src[1].Register.SwizzleY;
2795 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2796 swizzles[3] = inst->Src[1].Register.SwizzleW;
2797
2798 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2799 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2800 store_dest(mach, &r[swizzles[chan]],
2801 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2802 }
2803 }
2804 }
2805
2806 static void
exec_sample_d(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2807 exec_sample_d(struct tgsi_exec_machine *mach,
2808 const struct tgsi_full_instruction *inst)
2809 {
2810 const uint resource_unit = inst->Src[1].Register.Index;
2811 const uint sampler_unit = inst->Src[2].Register.Index;
2812 union tgsi_exec_channel r[4];
2813 float derivs[3][2][TGSI_QUAD_SIZE];
2814 uint chan;
2815 unsigned char swizzles[4];
2816 int8_t offsets[3];
2817
2818 /* always fetch all 3 offsets, overkill but keeps code simple */
2819 fetch_texel_offsets(mach, inst, offsets);
2820
2821 FETCH(&r[0], 0, TGSI_CHAN_X);
2822
2823 switch (mach->SamplerViews[resource_unit].Resource) {
2824 case TGSI_TEXTURE_1D:
2825 case TGSI_TEXTURE_1D_ARRAY:
2826 /* only 1D array actually needs Y */
2827 FETCH(&r[1], 0, TGSI_CHAN_Y);
2828
2829 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2830
2831 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2832 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2833 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2834 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2835 break;
2836
2837 case TGSI_TEXTURE_2D:
2838 case TGSI_TEXTURE_RECT:
2839 case TGSI_TEXTURE_2D_ARRAY:
2840 /* only 2D array actually needs Z */
2841 FETCH(&r[1], 0, TGSI_CHAN_Y);
2842 FETCH(&r[2], 0, TGSI_CHAN_Z);
2843
2844 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2845 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2846
2847 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2848 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */
2849 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2850 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2851 break;
2852
2853 case TGSI_TEXTURE_3D:
2854 case TGSI_TEXTURE_CUBE:
2855 case TGSI_TEXTURE_CUBE_ARRAY:
2856 /* only cube array actually needs W */
2857 FETCH(&r[1], 0, TGSI_CHAN_Y);
2858 FETCH(&r[2], 0, TGSI_CHAN_Z);
2859 FETCH(&r[3], 0, TGSI_CHAN_W);
2860
2861 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2862 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2863 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2864
2865 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2866 &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2867 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2868 &r[0], &r[1], &r[2], &r[3]);
2869 break;
2870
2871 default:
2872 assert(0);
2873 }
2874
2875 swizzles[0] = inst->Src[1].Register.SwizzleX;
2876 swizzles[1] = inst->Src[1].Register.SwizzleY;
2877 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2878 swizzles[3] = inst->Src[1].Register.SwizzleW;
2879
2880 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2881 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2882 store_dest(mach, &r[swizzles[chan]],
2883 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
2884 }
2885 }
2886 }
2887
2888
2889 /**
2890 * Evaluate a constant-valued coefficient at the position of the
2891 * current quad.
2892 */
2893 static void
eval_constant_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2894 eval_constant_coef(
2895 struct tgsi_exec_machine *mach,
2896 unsigned attrib,
2897 unsigned chan )
2898 {
2899 unsigned i;
2900
2901 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2902 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2903 }
2904 }
2905
2906 /**
2907 * Evaluate a linear-valued coefficient at the position of the
2908 * current quad.
2909 */
2910 static void
eval_linear_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2911 eval_linear_coef(
2912 struct tgsi_exec_machine *mach,
2913 unsigned attrib,
2914 unsigned chan )
2915 {
2916 const float x = mach->QuadPos.xyzw[0].f[0];
2917 const float y = mach->QuadPos.xyzw[1].f[0];
2918 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2919 const float dady = mach->InterpCoefs[attrib].dady[chan];
2920 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2921 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2922 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2923 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2924 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2925 }
2926
2927 /**
2928 * Evaluate a perspective-valued coefficient at the position of the
2929 * current quad.
2930 */
2931 static void
eval_perspective_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2932 eval_perspective_coef(
2933 struct tgsi_exec_machine *mach,
2934 unsigned attrib,
2935 unsigned chan )
2936 {
2937 const float x = mach->QuadPos.xyzw[0].f[0];
2938 const float y = mach->QuadPos.xyzw[1].f[0];
2939 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2940 const float dady = mach->InterpCoefs[attrib].dady[chan];
2941 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2942 const float *w = mach->QuadPos.xyzw[3].f;
2943 /* divide by W here */
2944 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2945 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2946 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2947 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2948 }
2949
2950
2951 typedef void (* eval_coef_func)(
2952 struct tgsi_exec_machine *mach,
2953 unsigned attrib,
2954 unsigned chan );
2955
2956 static void
exec_declaration(struct tgsi_exec_machine * mach,const struct tgsi_full_declaration * decl)2957 exec_declaration(struct tgsi_exec_machine *mach,
2958 const struct tgsi_full_declaration *decl)
2959 {
2960 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2961 mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2962 return;
2963 }
2964
2965 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
2966 if (decl->Declaration.File == TGSI_FILE_INPUT) {
2967 uint first, last, mask;
2968
2969 first = decl->Range.First;
2970 last = decl->Range.Last;
2971 mask = decl->Declaration.UsageMask;
2972
2973 /* XXX we could remove this special-case code since
2974 * mach->InterpCoefs[first].a0 should already have the
2975 * front/back-face value. But we should first update the
2976 * ureg code to emit the right UsageMask value (WRITEMASK_X).
2977 * Then, we could remove the tgsi_exec_machine::Face field.
2978 */
2979 /* XXX make FACE a system value */
2980 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2981 uint i;
2982
2983 assert(decl->Semantic.Index == 0);
2984 assert(first == last);
2985
2986 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2987 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2988 }
2989 } else {
2990 eval_coef_func eval;
2991 uint i, j;
2992
2993 switch (decl->Interp.Interpolate) {
2994 case TGSI_INTERPOLATE_CONSTANT:
2995 eval = eval_constant_coef;
2996 break;
2997
2998 case TGSI_INTERPOLATE_LINEAR:
2999 eval = eval_linear_coef;
3000 break;
3001
3002 case TGSI_INTERPOLATE_PERSPECTIVE:
3003 eval = eval_perspective_coef;
3004 break;
3005
3006 case TGSI_INTERPOLATE_COLOR:
3007 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
3008 break;
3009
3010 default:
3011 assert(0);
3012 return;
3013 }
3014
3015 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3016 if (mask & (1 << j)) {
3017 for (i = first; i <= last; i++) {
3018 eval(mach, i, j);
3019 }
3020 }
3021 }
3022 }
3023
3024 if (DEBUG_EXECUTION) {
3025 uint i, j;
3026 for (i = first; i <= last; ++i) {
3027 debug_printf("IN[%2u] = ", i);
3028 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
3029 if (j > 0) {
3030 debug_printf(" ");
3031 }
3032 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
3033 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
3034 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
3035 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
3036 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
3037 }
3038 }
3039 }
3040 }
3041 }
3042
3043 }
3044
3045 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
3046 const union tgsi_exec_channel *src);
3047
3048 static void
exec_scalar_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3049 exec_scalar_unary(struct tgsi_exec_machine *mach,
3050 const struct tgsi_full_instruction *inst,
3051 micro_unary_op op,
3052 enum tgsi_exec_datatype dst_datatype,
3053 enum tgsi_exec_datatype src_datatype)
3054 {
3055 unsigned int chan;
3056 union tgsi_exec_channel src;
3057 union tgsi_exec_channel dst;
3058
3059 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
3060 op(&dst, &src);
3061 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3062 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3063 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3064 }
3065 }
3066 }
3067
3068 static void
exec_vector_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3069 exec_vector_unary(struct tgsi_exec_machine *mach,
3070 const struct tgsi_full_instruction *inst,
3071 micro_unary_op op,
3072 enum tgsi_exec_datatype dst_datatype,
3073 enum tgsi_exec_datatype src_datatype)
3074 {
3075 unsigned int chan;
3076 struct tgsi_exec_vector dst;
3077
3078 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3079 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3080 union tgsi_exec_channel src;
3081
3082 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
3083 op(&dst.xyzw[chan], &src);
3084 }
3085 }
3086 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3087 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3088 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3089 }
3090 }
3091 }
3092
3093 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
3094 const union tgsi_exec_channel *src0,
3095 const union tgsi_exec_channel *src1);
3096
3097 static void
exec_scalar_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3098 exec_scalar_binary(struct tgsi_exec_machine *mach,
3099 const struct tgsi_full_instruction *inst,
3100 micro_binary_op op,
3101 enum tgsi_exec_datatype dst_datatype,
3102 enum tgsi_exec_datatype src_datatype)
3103 {
3104 unsigned int chan;
3105 union tgsi_exec_channel src[2];
3106 union tgsi_exec_channel dst;
3107
3108 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
3109 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
3110 op(&dst, &src[0], &src[1]);
3111 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3112 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3113 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype);
3114 }
3115 }
3116 }
3117
3118 static void
exec_vector_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3119 exec_vector_binary(struct tgsi_exec_machine *mach,
3120 const struct tgsi_full_instruction *inst,
3121 micro_binary_op op,
3122 enum tgsi_exec_datatype dst_datatype,
3123 enum tgsi_exec_datatype src_datatype)
3124 {
3125 unsigned int chan;
3126 struct tgsi_exec_vector dst;
3127
3128 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3129 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3130 union tgsi_exec_channel src[2];
3131
3132 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3133 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3134 op(&dst.xyzw[chan], &src[0], &src[1]);
3135 }
3136 }
3137 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3138 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3139 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3140 }
3141 }
3142 }
3143
3144 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
3145 const union tgsi_exec_channel *src0,
3146 const union tgsi_exec_channel *src1,
3147 const union tgsi_exec_channel *src2);
3148
3149 static void
exec_vector_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_trinary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3150 exec_vector_trinary(struct tgsi_exec_machine *mach,
3151 const struct tgsi_full_instruction *inst,
3152 micro_trinary_op op,
3153 enum tgsi_exec_datatype dst_datatype,
3154 enum tgsi_exec_datatype src_datatype)
3155 {
3156 unsigned int chan;
3157 struct tgsi_exec_vector dst;
3158
3159 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3160 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3161 union tgsi_exec_channel src[3];
3162
3163 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3164 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3165 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3166 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3167 }
3168 }
3169 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3170 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3171 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3172 }
3173 }
3174 }
3175
3176 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
3177 const union tgsi_exec_channel *src0,
3178 const union tgsi_exec_channel *src1,
3179 const union tgsi_exec_channel *src2,
3180 const union tgsi_exec_channel *src3);
3181
3182 static void
exec_vector_quaternary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_quaternary_op op,enum tgsi_exec_datatype dst_datatype,enum tgsi_exec_datatype src_datatype)3183 exec_vector_quaternary(struct tgsi_exec_machine *mach,
3184 const struct tgsi_full_instruction *inst,
3185 micro_quaternary_op op,
3186 enum tgsi_exec_datatype dst_datatype,
3187 enum tgsi_exec_datatype src_datatype)
3188 {
3189 unsigned int chan;
3190 struct tgsi_exec_vector dst;
3191
3192 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3193 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3194 union tgsi_exec_channel src[4];
3195
3196 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3197 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3198 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3199 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3200 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3201 }
3202 }
3203 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3204 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3205 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype);
3206 }
3207 }
3208 }
3209
3210 static void
exec_dp3(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3211 exec_dp3(struct tgsi_exec_machine *mach,
3212 const struct tgsi_full_instruction *inst)
3213 {
3214 unsigned int chan;
3215 union tgsi_exec_channel arg[3];
3216
3217 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3218 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3219 micro_mul(&arg[2], &arg[0], &arg[1]);
3220
3221 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3222 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3223 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3224 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3225 }
3226
3227 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3228 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3229 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3230 }
3231 }
3232 }
3233
3234 static void
exec_dp4(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3235 exec_dp4(struct tgsi_exec_machine *mach,
3236 const struct tgsi_full_instruction *inst)
3237 {
3238 unsigned int chan;
3239 union tgsi_exec_channel arg[3];
3240
3241 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3242 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3243 micro_mul(&arg[2], &arg[0], &arg[1]);
3244
3245 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3246 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3247 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3248 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3249 }
3250
3251 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3252 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3253 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3254 }
3255 }
3256 }
3257
3258 static void
exec_dp2a(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3259 exec_dp2a(struct tgsi_exec_machine *mach,
3260 const struct tgsi_full_instruction *inst)
3261 {
3262 unsigned int chan;
3263 union tgsi_exec_channel arg[3];
3264
3265 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3266 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3267 micro_mul(&arg[2], &arg[0], &arg[1]);
3268
3269 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3270 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3271 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
3272
3273 fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3274 micro_add(&arg[0], &arg[0], &arg[1]);
3275
3276 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3277 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3278 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3279 }
3280 }
3281 }
3282
3283 static void
exec_dph(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3284 exec_dph(struct tgsi_exec_machine *mach,
3285 const struct tgsi_full_instruction *inst)
3286 {
3287 unsigned int chan;
3288 union tgsi_exec_channel arg[3];
3289
3290 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3291 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3292 micro_mul(&arg[2], &arg[0], &arg[1]);
3293
3294 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3295 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3296 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3297
3298 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3299 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3300 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]);
3301
3302 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3303 micro_add(&arg[0], &arg[0], &arg[1]);
3304
3305 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3306 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3307 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3308 }
3309 }
3310 }
3311
3312 static void
exec_dp2(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3313 exec_dp2(struct tgsi_exec_machine *mach,
3314 const struct tgsi_full_instruction *inst)
3315 {
3316 unsigned int chan;
3317 union tgsi_exec_channel arg[3];
3318
3319 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3320 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3321 micro_mul(&arg[2], &arg[0], &arg[1]);
3322
3323 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3324 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3325 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3326
3327 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3328 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3329 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3330 }
3331 }
3332 }
3333
3334 static void
exec_pk2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3335 exec_pk2h(struct tgsi_exec_machine *mach,
3336 const struct tgsi_full_instruction *inst)
3337 {
3338 unsigned chan;
3339 union tgsi_exec_channel arg[2], dst;
3340
3341 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3342 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3343 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3344 dst.u[chan] = util_float_to_half(arg[0].f[chan]) |
3345 (util_float_to_half(arg[1].f[chan]) << 16);
3346 }
3347 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3348 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3349 store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT);
3350 }
3351 }
3352 }
3353
3354 static void
exec_up2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3355 exec_up2h(struct tgsi_exec_machine *mach,
3356 const struct tgsi_full_instruction *inst)
3357 {
3358 unsigned chan;
3359 union tgsi_exec_channel arg, dst[2];
3360
3361 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3362 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3363 dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff);
3364 dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16);
3365 }
3366 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3367 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3368 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3369 }
3370 }
3371 }
3372
3373 static void
exec_scs(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3374 exec_scs(struct tgsi_exec_machine *mach,
3375 const struct tgsi_full_instruction *inst)
3376 {
3377 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) {
3378 union tgsi_exec_channel arg;
3379 union tgsi_exec_channel result;
3380
3381 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3382
3383 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3384 micro_cos(&result, &arg);
3385 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3386 }
3387 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3388 micro_sin(&result, &arg);
3389 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3390 }
3391 }
3392 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3393 store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3394 }
3395 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3396 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3397 }
3398 }
3399
3400 static void
exec_xpd(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3401 exec_xpd(struct tgsi_exec_machine *mach,
3402 const struct tgsi_full_instruction *inst)
3403 {
3404 union tgsi_exec_channel r[6];
3405 union tgsi_exec_channel d[3];
3406
3407 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3408 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3409
3410 micro_mul(&r[2], &r[0], &r[1]);
3411
3412 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3413 fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3414
3415 micro_mul(&r[5], &r[3], &r[4] );
3416 micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]);
3417
3418 fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3419
3420 micro_mul(&r[3], &r[3], &r[2]);
3421
3422 fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3423
3424 micro_mul(&r[1], &r[1], &r[5]);
3425 micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]);
3426
3427 micro_mul(&r[5], &r[5], &r[4]);
3428 micro_mul(&r[0], &r[0], &r[2]);
3429 micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]);
3430
3431 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3432 store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3433 }
3434 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3435 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3436 }
3437 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3438 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3439 }
3440 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3441 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3442 }
3443 }
3444
3445 static void
exec_dst(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3446 exec_dst(struct tgsi_exec_machine *mach,
3447 const struct tgsi_full_instruction *inst)
3448 {
3449 union tgsi_exec_channel r[2];
3450 union tgsi_exec_channel d[4];
3451
3452 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3453 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3454 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3455 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3456 }
3457 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3458 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3459 }
3460 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3461 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3462 }
3463
3464 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3465 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3466 }
3467 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3468 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3469 }
3470 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3471 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3472 }
3473 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3474 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3475 }
3476 }
3477
3478 static void
exec_log(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3479 exec_log(struct tgsi_exec_machine *mach,
3480 const struct tgsi_full_instruction *inst)
3481 {
3482 union tgsi_exec_channel r[3];
3483
3484 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3485 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
3486 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
3487 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
3488 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3489 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3490 }
3491 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3492 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
3493 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3494 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3495 }
3496 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3497 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3498 }
3499 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3500 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3501 }
3502 }
3503
3504 static void
exec_exp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3505 exec_exp(struct tgsi_exec_machine *mach,
3506 const struct tgsi_full_instruction *inst)
3507 {
3508 union tgsi_exec_channel r[3];
3509
3510 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3511 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
3512 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3513 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
3514 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3515 }
3516 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3517 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3518 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3519 }
3520 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3521 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
3522 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3523 }
3524 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3525 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3526 }
3527 }
3528
3529 static void
exec_lit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3530 exec_lit(struct tgsi_exec_machine *mach,
3531 const struct tgsi_full_instruction *inst)
3532 {
3533 union tgsi_exec_channel r[3];
3534 union tgsi_exec_channel d[3];
3535
3536 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3537 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3538 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3539 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3540 micro_max(&r[1], &r[1], &ZeroVec);
3541
3542 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3543 micro_min(&r[2], &r[2], &P128Vec);
3544 micro_max(&r[2], &r[2], &M128Vec);
3545 micro_pow(&r[1], &r[1], &r[2]);
3546 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3547 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3548 }
3549 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3550 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3551 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3552 }
3553 }
3554 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3555 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3556 }
3557
3558 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3559 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3560 }
3561 }
3562
3563 static void
exec_break(struct tgsi_exec_machine * mach)3564 exec_break(struct tgsi_exec_machine *mach)
3565 {
3566 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3567 /* turn off loop channels for each enabled exec channel */
3568 mach->LoopMask &= ~mach->ExecMask;
3569 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3570 UPDATE_EXEC_MASK(mach);
3571 } else {
3572 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3573
3574 mach->Switch.mask = 0x0;
3575
3576 UPDATE_EXEC_MASK(mach);
3577 }
3578 }
3579
3580 static void
exec_switch(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3581 exec_switch(struct tgsi_exec_machine *mach,
3582 const struct tgsi_full_instruction *inst)
3583 {
3584 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3585 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3586
3587 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3588 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3589 mach->Switch.mask = 0x0;
3590 mach->Switch.defaultMask = 0x0;
3591
3592 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3593 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3594
3595 UPDATE_EXEC_MASK(mach);
3596 }
3597
3598 static void
exec_case(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3599 exec_case(struct tgsi_exec_machine *mach,
3600 const struct tgsi_full_instruction *inst)
3601 {
3602 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3603 union tgsi_exec_channel src;
3604 uint mask = 0;
3605
3606 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3607
3608 if (mach->Switch.selector.u[0] == src.u[0]) {
3609 mask |= 0x1;
3610 }
3611 if (mach->Switch.selector.u[1] == src.u[1]) {
3612 mask |= 0x2;
3613 }
3614 if (mach->Switch.selector.u[2] == src.u[2]) {
3615 mask |= 0x4;
3616 }
3617 if (mach->Switch.selector.u[3] == src.u[3]) {
3618 mask |= 0x8;
3619 }
3620
3621 mach->Switch.defaultMask |= mask;
3622
3623 mach->Switch.mask |= mask & prevMask;
3624
3625 UPDATE_EXEC_MASK(mach);
3626 }
3627
3628 /* FIXME: this will only work if default is last */
3629 static void
exec_default(struct tgsi_exec_machine * mach)3630 exec_default(struct tgsi_exec_machine *mach)
3631 {
3632 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3633
3634 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3635
3636 UPDATE_EXEC_MASK(mach);
3637 }
3638
3639 static void
exec_endswitch(struct tgsi_exec_machine * mach)3640 exec_endswitch(struct tgsi_exec_machine *mach)
3641 {
3642 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3643 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3644
3645 UPDATE_EXEC_MASK(mach);
3646 }
3647
3648 typedef void (* micro_dop)(union tgsi_double_channel *dst,
3649 const union tgsi_double_channel *src);
3650
3651 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
3652 const union tgsi_double_channel *src0,
3653 union tgsi_exec_channel *src1);
3654
3655 typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
3656 const union tgsi_exec_channel *src);
3657
3658 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
3659 const union tgsi_double_channel *src);
3660
3661 static void
fetch_double_channel(struct tgsi_exec_machine * mach,union tgsi_double_channel * chan,const struct tgsi_full_src_register * reg,uint chan_0,uint chan_1)3662 fetch_double_channel(struct tgsi_exec_machine *mach,
3663 union tgsi_double_channel *chan,
3664 const struct tgsi_full_src_register *reg,
3665 uint chan_0,
3666 uint chan_1)
3667 {
3668 union tgsi_exec_channel src[2];
3669 uint i;
3670
3671 fetch_source_d(mach, &src[0], reg, chan_0, TGSI_EXEC_DATA_UINT);
3672 fetch_source_d(mach, &src[1], reg, chan_1, TGSI_EXEC_DATA_UINT);
3673
3674 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3675 chan->u[i][0] = src[0].u[i];
3676 chan->u[i][1] = src[1].u[i];
3677 }
3678 if (reg->Register.Absolute) {
3679 micro_dabs(chan, chan);
3680 }
3681 if (reg->Register.Negate) {
3682 micro_dneg(chan, chan);
3683 }
3684 }
3685
3686 static void
store_double_channel(struct tgsi_exec_machine * mach,const union tgsi_double_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_0,uint chan_1)3687 store_double_channel(struct tgsi_exec_machine *mach,
3688 const union tgsi_double_channel *chan,
3689 const struct tgsi_full_dst_register *reg,
3690 const struct tgsi_full_instruction *inst,
3691 uint chan_0,
3692 uint chan_1)
3693 {
3694 union tgsi_exec_channel dst[2];
3695 uint i;
3696 union tgsi_double_channel temp;
3697 const uint execmask = mach->ExecMask;
3698
3699 if (!inst->Instruction.Saturate) {
3700 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3701 if (execmask & (1 << i)) {
3702 dst[0].u[i] = chan->u[i][0];
3703 dst[1].u[i] = chan->u[i][1];
3704 }
3705 }
3706 else {
3707 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3708 if (execmask & (1 << i)) {
3709 if (chan->d[i] < 0.0)
3710 temp.d[i] = 0.0;
3711 else if (chan->d[i] > 1.0)
3712 temp.d[i] = 1.0;
3713 else
3714 temp.d[i] = chan->d[i];
3715
3716 dst[0].u[i] = temp.u[i][0];
3717 dst[1].u[i] = temp.u[i][1];
3718 }
3719 }
3720
3721 store_dest_double(mach, &dst[0], reg, inst, chan_0, TGSI_EXEC_DATA_UINT);
3722 if (chan_1 != -1)
3723 store_dest_double(mach, &dst[1], reg, inst, chan_1, TGSI_EXEC_DATA_UINT);
3724 }
3725
3726 static void
exec_double_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3727 exec_double_unary(struct tgsi_exec_machine *mach,
3728 const struct tgsi_full_instruction *inst,
3729 micro_dop op)
3730 {
3731 union tgsi_double_channel src;
3732 union tgsi_double_channel dst;
3733
3734 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3735 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3736 op(&dst, &src);
3737 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3738 }
3739 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3740 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3741 op(&dst, &src);
3742 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3743 }
3744 }
3745
3746 static void
exec_double_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op,enum tgsi_exec_datatype dst_datatype)3747 exec_double_binary(struct tgsi_exec_machine *mach,
3748 const struct tgsi_full_instruction *inst,
3749 micro_dop op,
3750 enum tgsi_exec_datatype dst_datatype)
3751 {
3752 union tgsi_double_channel src[2];
3753 union tgsi_double_channel dst;
3754 int first_dest_chan, second_dest_chan;
3755 int wmask;
3756
3757 wmask = inst->Dst[0].Register.WriteMask;
3758 /* these are & because of the way DSLT etc store their destinations */
3759 if (wmask & TGSI_WRITEMASK_XY) {
3760 first_dest_chan = TGSI_CHAN_X;
3761 second_dest_chan = TGSI_CHAN_Y;
3762 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3763 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
3764 second_dest_chan = -1;
3765 }
3766
3767 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3768 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3769 op(&dst, src);
3770 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3771 }
3772
3773 if (wmask & TGSI_WRITEMASK_ZW) {
3774 first_dest_chan = TGSI_CHAN_Z;
3775 second_dest_chan = TGSI_CHAN_W;
3776 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3777 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
3778 second_dest_chan = -1;
3779 }
3780
3781 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3782 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3783 op(&dst, src);
3784 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3785 }
3786 }
3787
3788 static void
exec_double_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3789 exec_double_trinary(struct tgsi_exec_machine *mach,
3790 const struct tgsi_full_instruction *inst,
3791 micro_dop op)
3792 {
3793 union tgsi_double_channel src[3];
3794 union tgsi_double_channel dst;
3795
3796 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3797 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3798 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3799 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
3800 op(&dst, src);
3801 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3802 }
3803 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3804 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3805 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3806 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
3807 op(&dst, src);
3808 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3809 }
3810 }
3811
3812 static void
exec_dldexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3813 exec_dldexp(struct tgsi_exec_machine *mach,
3814 const struct tgsi_full_instruction *inst)
3815 {
3816 union tgsi_double_channel src0;
3817 union tgsi_exec_channel src1;
3818 union tgsi_double_channel dst;
3819 int wmask;
3820
3821 wmask = inst->Dst[0].Register.WriteMask;
3822 if (wmask & TGSI_WRITEMASK_XY) {
3823 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3824 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3825 micro_dldexp(&dst, &src0, &src1);
3826 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3827 }
3828
3829 if (wmask & TGSI_WRITEMASK_ZW) {
3830 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3831 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3832 micro_dldexp(&dst, &src0, &src1);
3833 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3834 }
3835 }
3836
3837 static void
exec_dfracexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3838 exec_dfracexp(struct tgsi_exec_machine *mach,
3839 const struct tgsi_full_instruction *inst)
3840 {
3841 union tgsi_double_channel src;
3842 union tgsi_double_channel dst;
3843 union tgsi_exec_channel dst_exp;
3844
3845 if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)) {
3846 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3847 micro_dfracexp(&dst, &dst_exp, &src);
3848 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3849 store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
3850 }
3851 if (((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)) {
3852 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3853 micro_dfracexp(&dst, &dst_exp, &src);
3854 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3855 store_dest(mach, &dst_exp, &inst->Dst[1], inst, ffs(inst->Dst[1].Register.WriteMask) - 1, TGSI_EXEC_DATA_INT);
3856 }
3857 }
3858
3859 static void
exec_arg0_64_arg1_32(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_sop op)3860 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
3861 const struct tgsi_full_instruction *inst,
3862 micro_dop_sop op)
3863 {
3864 union tgsi_double_channel src0;
3865 union tgsi_exec_channel src1;
3866 union tgsi_double_channel dst;
3867 int wmask;
3868
3869 wmask = inst->Dst[0].Register.WriteMask;
3870 if (wmask & TGSI_WRITEMASK_XY) {
3871 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3872 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3873 op(&dst, &src0, &src1);
3874 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3875 }
3876
3877 if (wmask & TGSI_WRITEMASK_ZW) {
3878 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3879 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3880 op(&dst, &src0, &src1);
3881 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3882 }
3883 }
3884
3885 static int
get_image_coord_dim(unsigned tgsi_tex)3886 get_image_coord_dim(unsigned tgsi_tex)
3887 {
3888 int dim;
3889 switch (tgsi_tex) {
3890 case TGSI_TEXTURE_BUFFER:
3891 case TGSI_TEXTURE_1D:
3892 dim = 1;
3893 break;
3894 case TGSI_TEXTURE_2D:
3895 case TGSI_TEXTURE_RECT:
3896 case TGSI_TEXTURE_1D_ARRAY:
3897 case TGSI_TEXTURE_2D_MSAA:
3898 dim = 2;
3899 break;
3900 case TGSI_TEXTURE_3D:
3901 case TGSI_TEXTURE_CUBE:
3902 case TGSI_TEXTURE_2D_ARRAY:
3903 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3904 case TGSI_TEXTURE_CUBE_ARRAY:
3905 dim = 3;
3906 break;
3907 default:
3908 assert(!"unknown texture target");
3909 dim = 0;
3910 break;
3911 }
3912
3913 return dim;
3914 }
3915
3916 static int
get_image_coord_sample(unsigned tgsi_tex)3917 get_image_coord_sample(unsigned tgsi_tex)
3918 {
3919 int sample = 0;
3920 switch (tgsi_tex) {
3921 case TGSI_TEXTURE_2D_MSAA:
3922 sample = 3;
3923 break;
3924 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3925 sample = 4;
3926 break;
3927 default:
3928 break;
3929 }
3930 return sample;
3931 }
3932
3933 static void
exec_load_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3934 exec_load_img(struct tgsi_exec_machine *mach,
3935 const struct tgsi_full_instruction *inst)
3936 {
3937 union tgsi_exec_channel r[4], sample_r;
3938 uint unit;
3939 int sample;
3940 int i, j;
3941 int dim;
3942 uint chan;
3943 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3944 struct tgsi_image_params params;
3945 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3946
3947 unit = fetch_sampler_unit(mach, inst, 0);
3948 dim = get_image_coord_dim(inst->Memory.Texture);
3949 sample = get_image_coord_sample(inst->Memory.Texture);
3950 assert(dim <= 3);
3951
3952 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3953 params.unit = unit;
3954 params.tgsi_tex_instr = inst->Memory.Texture;
3955 params.format = inst->Memory.Format;
3956
3957 for (i = 0; i < dim; i++) {
3958 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3959 }
3960
3961 if (sample)
3962 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3963
3964 mach->Image->load(mach->Image, ¶ms,
3965 r[0].i, r[1].i, r[2].i, sample_r.i,
3966 rgba);
3967 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3968 r[0].f[j] = rgba[0][j];
3969 r[1].f[j] = rgba[1][j];
3970 r[2].f[j] = rgba[2][j];
3971 r[3].f[j] = rgba[3][j];
3972 }
3973 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3974 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3975 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
3976 }
3977 }
3978 }
3979
3980 static void
exec_load_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3981 exec_load_buf(struct tgsi_exec_machine *mach,
3982 const struct tgsi_full_instruction *inst)
3983 {
3984 union tgsi_exec_channel r[4];
3985 uint unit;
3986 int j;
3987 uint chan;
3988 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3989 struct tgsi_buffer_params params;
3990 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
3991
3992 unit = fetch_sampler_unit(mach, inst, 0);
3993
3994 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
3995 params.unit = unit;
3996 IFETCH(&r[0], 1, TGSI_CHAN_X);
3997
3998 mach->Buffer->load(mach->Buffer, ¶ms,
3999 r[0].i, rgba);
4000 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4001 r[0].f[j] = rgba[0][j];
4002 r[1].f[j] = rgba[1][j];
4003 r[2].f[j] = rgba[2][j];
4004 r[3].f[j] = rgba[3][j];
4005 }
4006 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4007 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4008 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4009 }
4010 }
4011 }
4012
4013 static void
exec_load_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4014 exec_load_mem(struct tgsi_exec_machine *mach,
4015 const struct tgsi_full_instruction *inst)
4016 {
4017 union tgsi_exec_channel r[4];
4018 uint chan;
4019 char *ptr = mach->LocalMem;
4020 uint32_t offset;
4021 int j;
4022
4023 IFETCH(&r[0], 1, TGSI_CHAN_X);
4024 if (r[0].u[0] >= mach->LocalMemSize)
4025 return;
4026
4027 offset = r[0].u[0];
4028 ptr += offset;
4029
4030 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4031 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4032 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4033 memcpy(&r[chan].u[j], ptr + (4 * chan), 4);
4034 }
4035 }
4036 }
4037
4038 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4039 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4040 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4041 }
4042 }
4043 }
4044
4045 static void
exec_load(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4046 exec_load(struct tgsi_exec_machine *mach,
4047 const struct tgsi_full_instruction *inst)
4048 {
4049 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4050 exec_load_img(mach, inst);
4051 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4052 exec_load_buf(mach, inst);
4053 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4054 exec_load_mem(mach, inst);
4055 }
4056
4057 static void
exec_store_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4058 exec_store_img(struct tgsi_exec_machine *mach,
4059 const struct tgsi_full_instruction *inst)
4060 {
4061 union tgsi_exec_channel r[3], sample_r;
4062 union tgsi_exec_channel value[4];
4063 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4064 struct tgsi_image_params params;
4065 int dim;
4066 int sample;
4067 int i, j;
4068 uint unit;
4069 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4070 unit = inst->Dst[0].Register.Index;
4071 dim = get_image_coord_dim(inst->Memory.Texture);
4072 sample = get_image_coord_sample(inst->Memory.Texture);
4073 assert(dim <= 3);
4074
4075 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4076 params.unit = unit;
4077 params.tgsi_tex_instr = inst->Memory.Texture;
4078 params.format = inst->Memory.Format;
4079
4080 for (i = 0; i < dim; i++) {
4081 IFETCH(&r[i], 0, TGSI_CHAN_X + i);
4082 }
4083
4084 for (i = 0; i < 4; i++) {
4085 FETCH(&value[i], 1, TGSI_CHAN_X + i);
4086 }
4087 if (sample)
4088 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
4089
4090 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4091 rgba[0][j] = value[0].f[j];
4092 rgba[1][j] = value[1].f[j];
4093 rgba[2][j] = value[2].f[j];
4094 rgba[3][j] = value[3].f[j];
4095 }
4096
4097 mach->Image->store(mach->Image, ¶ms,
4098 r[0].i, r[1].i, r[2].i, sample_r.i,
4099 rgba);
4100 }
4101
4102 static void
exec_store_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4103 exec_store_buf(struct tgsi_exec_machine *mach,
4104 const struct tgsi_full_instruction *inst)
4105 {
4106 union tgsi_exec_channel r[3];
4107 union tgsi_exec_channel value[4];
4108 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4109 struct tgsi_buffer_params params;
4110 int i, j;
4111 uint unit;
4112 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4113
4114 unit = inst->Dst[0].Register.Index;
4115
4116 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4117 params.unit = unit;
4118 params.writemask = inst->Dst[0].Register.WriteMask;
4119
4120 IFETCH(&r[0], 0, TGSI_CHAN_X);
4121 for (i = 0; i < 4; i++) {
4122 FETCH(&value[i], 1, TGSI_CHAN_X + i);
4123 }
4124
4125 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4126 rgba[0][j] = value[0].f[j];
4127 rgba[1][j] = value[1].f[j];
4128 rgba[2][j] = value[2].f[j];
4129 rgba[3][j] = value[3].f[j];
4130 }
4131
4132 mach->Buffer->store(mach->Buffer, ¶ms,
4133 r[0].i,
4134 rgba);
4135 }
4136
4137 static void
exec_store_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4138 exec_store_mem(struct tgsi_exec_machine *mach,
4139 const struct tgsi_full_instruction *inst)
4140 {
4141 union tgsi_exec_channel r[3];
4142 union tgsi_exec_channel value[4];
4143 uint i, chan;
4144 char *ptr = mach->LocalMem;
4145 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4146 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4147
4148 IFETCH(&r[0], 0, TGSI_CHAN_X);
4149
4150 for (i = 0; i < 4; i++) {
4151 FETCH(&value[i], 1, TGSI_CHAN_X + i);
4152 }
4153
4154 if (r[0].u[0] >= mach->LocalMemSize)
4155 return;
4156 ptr += r[0].u[0];
4157
4158 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4159 if (execmask & (1 << i)) {
4160 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4161 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4162 memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
4163 }
4164 }
4165 }
4166 }
4167 }
4168
4169 static void
exec_store(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4170 exec_store(struct tgsi_exec_machine *mach,
4171 const struct tgsi_full_instruction *inst)
4172 {
4173 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
4174 exec_store_img(mach, inst);
4175 else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
4176 exec_store_buf(mach, inst);
4177 else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
4178 exec_store_mem(mach, inst);
4179 }
4180
4181 static void
exec_atomop_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4182 exec_atomop_img(struct tgsi_exec_machine *mach,
4183 const struct tgsi_full_instruction *inst)
4184 {
4185 union tgsi_exec_channel r[4], sample_r;
4186 union tgsi_exec_channel value[4], value2[4];
4187 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4188 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4189 struct tgsi_image_params params;
4190 int dim;
4191 int sample;
4192 int i, j;
4193 uint unit, chan;
4194 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4195 unit = fetch_sampler_unit(mach, inst, 0);
4196 dim = get_image_coord_dim(inst->Memory.Texture);
4197 sample = get_image_coord_sample(inst->Memory.Texture);
4198 assert(dim <= 3);
4199
4200 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4201 params.unit = unit;
4202 params.tgsi_tex_instr = inst->Memory.Texture;
4203 params.format = inst->Memory.Format;
4204
4205 for (i = 0; i < dim; i++) {
4206 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
4207 }
4208
4209 for (i = 0; i < 4; i++) {
4210 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4211 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4212 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4213 }
4214 if (sample)
4215 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
4216
4217 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4218 rgba[0][j] = value[0].f[j];
4219 rgba[1][j] = value[1].f[j];
4220 rgba[2][j] = value[2].f[j];
4221 rgba[3][j] = value[3].f[j];
4222 }
4223 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4224 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4225 rgba2[0][j] = value2[0].f[j];
4226 rgba2[1][j] = value2[1].f[j];
4227 rgba2[2][j] = value2[2].f[j];
4228 rgba2[3][j] = value2[3].f[j];
4229 }
4230 }
4231
4232 mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode,
4233 r[0].i, r[1].i, r[2].i, sample_r.i,
4234 rgba, rgba2);
4235
4236 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4237 r[0].f[j] = rgba[0][j];
4238 r[1].f[j] = rgba[1][j];
4239 r[2].f[j] = rgba[2][j];
4240 r[3].f[j] = rgba[3][j];
4241 }
4242 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4243 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4244 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4245 }
4246 }
4247 }
4248
4249 static void
exec_atomop_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4250 exec_atomop_buf(struct tgsi_exec_machine *mach,
4251 const struct tgsi_full_instruction *inst)
4252 {
4253 union tgsi_exec_channel r[4];
4254 union tgsi_exec_channel value[4], value2[4];
4255 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4256 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
4257 struct tgsi_buffer_params params;
4258 int i, j;
4259 uint unit, chan;
4260 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4261
4262 unit = fetch_sampler_unit(mach, inst, 0);
4263
4264 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4265 params.unit = unit;
4266 params.writemask = inst->Dst[0].Register.WriteMask;
4267
4268 IFETCH(&r[0], 1, TGSI_CHAN_X);
4269
4270 for (i = 0; i < 4; i++) {
4271 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4272 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4273 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4274 }
4275
4276 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4277 rgba[0][j] = value[0].f[j];
4278 rgba[1][j] = value[1].f[j];
4279 rgba[2][j] = value[2].f[j];
4280 rgba[3][j] = value[3].f[j];
4281 }
4282 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4283 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4284 rgba2[0][j] = value2[0].f[j];
4285 rgba2[1][j] = value2[1].f[j];
4286 rgba2[2][j] = value2[2].f[j];
4287 rgba2[3][j] = value2[3].f[j];
4288 }
4289 }
4290
4291 mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode,
4292 r[0].i,
4293 rgba, rgba2);
4294
4295 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4296 r[0].f[j] = rgba[0][j];
4297 r[1].f[j] = rgba[1][j];
4298 r[2].f[j] = rgba[2][j];
4299 r[3].f[j] = rgba[3][j];
4300 }
4301 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4302 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4303 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4304 }
4305 }
4306 }
4307
4308 static void
exec_atomop_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4309 exec_atomop_mem(struct tgsi_exec_machine *mach,
4310 const struct tgsi_full_instruction *inst)
4311 {
4312 union tgsi_exec_channel r[4];
4313 union tgsi_exec_channel value[4], value2[4];
4314 char *ptr = mach->LocalMem;
4315 uint32_t val;
4316 uint chan, i;
4317 uint32_t offset;
4318 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4319 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4320 IFETCH(&r[0], 1, TGSI_CHAN_X);
4321
4322 if (r[0].u[0] >= mach->LocalMemSize)
4323 return;
4324
4325 offset = r[0].u[0];
4326 ptr += offset;
4327 for (i = 0; i < 4; i++) {
4328 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4329 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4330 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4331 }
4332
4333 memcpy(&r[0].u[0], ptr, 4);
4334 val = r[0].u[0];
4335 switch (inst->Instruction.Opcode) {
4336 case TGSI_OPCODE_ATOMUADD:
4337 val += value[0].u[0];
4338 break;
4339 case TGSI_OPCODE_ATOMXOR:
4340 val ^= value[0].u[0];
4341 break;
4342 case TGSI_OPCODE_ATOMOR:
4343 val |= value[0].u[0];
4344 break;
4345 case TGSI_OPCODE_ATOMAND:
4346 val &= value[0].u[0];
4347 break;
4348 case TGSI_OPCODE_ATOMUMIN:
4349 val = MIN2(val, value[0].u[0]);
4350 break;
4351 case TGSI_OPCODE_ATOMUMAX:
4352 val = MAX2(val, value[0].u[0]);
4353 break;
4354 case TGSI_OPCODE_ATOMIMIN:
4355 val = MIN2(r[0].i[0], value[0].i[0]);
4356 break;
4357 case TGSI_OPCODE_ATOMIMAX:
4358 val = MAX2(r[0].i[0], value[0].i[0]);
4359 break;
4360 case TGSI_OPCODE_ATOMXCHG:
4361 val = value[0].i[0];
4362 break;
4363 case TGSI_OPCODE_ATOMCAS:
4364 if (val == value[0].u[0])
4365 val = value2[0].u[0];
4366 break;
4367 default:
4368 break;
4369 }
4370 for (i = 0; i < TGSI_QUAD_SIZE; i++)
4371 if (execmask & (1 << i))
4372 memcpy(ptr, &val, 4);
4373
4374 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4375 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4376 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT);
4377 }
4378 }
4379 }
4380
4381 static void
exec_atomop(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4382 exec_atomop(struct tgsi_exec_machine *mach,
4383 const struct tgsi_full_instruction *inst)
4384 {
4385 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4386 exec_atomop_img(mach, inst);
4387 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER)
4388 exec_atomop_buf(mach, inst);
4389 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY)
4390 exec_atomop_mem(mach, inst);
4391 }
4392
4393 static void
exec_resq_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4394 exec_resq_img(struct tgsi_exec_machine *mach,
4395 const struct tgsi_full_instruction *inst)
4396 {
4397 int result[4];
4398 union tgsi_exec_channel r[4];
4399 uint unit;
4400 int i, chan, j;
4401 struct tgsi_image_params params;
4402 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4403
4404 unit = fetch_sampler_unit(mach, inst, 0);
4405
4406 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4407 params.unit = unit;
4408 params.tgsi_tex_instr = inst->Memory.Texture;
4409 params.format = inst->Memory.Format;
4410
4411 mach->Image->get_dims(mach->Image, ¶ms, result);
4412
4413 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4414 for (j = 0; j < 4; j++) {
4415 r[j].i[i] = result[j];
4416 }
4417 }
4418
4419 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4420 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4421 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4422 TGSI_EXEC_DATA_INT);
4423 }
4424 }
4425 }
4426
4427 static void
exec_resq_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4428 exec_resq_buf(struct tgsi_exec_machine *mach,
4429 const struct tgsi_full_instruction *inst)
4430 {
4431 int result;
4432 union tgsi_exec_channel r[4];
4433 uint unit;
4434 int i, chan;
4435 struct tgsi_buffer_params params;
4436 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
4437
4438 unit = fetch_sampler_unit(mach, inst, 0);
4439
4440 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask;
4441 params.unit = unit;
4442
4443 mach->Buffer->get_dims(mach->Buffer, ¶ms, &result);
4444
4445 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4446 r[0].i[i] = result;
4447 }
4448
4449 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4450 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4451 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan,
4452 TGSI_EXEC_DATA_INT);
4453 }
4454 }
4455 }
4456
4457 static void
exec_resq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4458 exec_resq(struct tgsi_exec_machine *mach,
4459 const struct tgsi_full_instruction *inst)
4460 {
4461 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4462 exec_resq_img(mach, inst);
4463 else
4464 exec_resq_buf(mach, inst);
4465 }
4466
4467 static void
micro_f2u64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4468 micro_f2u64(union tgsi_double_channel *dst,
4469 const union tgsi_exec_channel *src)
4470 {
4471 dst->u64[0] = (uint64_t)src->f[0];
4472 dst->u64[1] = (uint64_t)src->f[1];
4473 dst->u64[2] = (uint64_t)src->f[2];
4474 dst->u64[3] = (uint64_t)src->f[3];
4475 }
4476
4477 static void
micro_f2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4478 micro_f2i64(union tgsi_double_channel *dst,
4479 const union tgsi_exec_channel *src)
4480 {
4481 dst->i64[0] = (int64_t)src->f[0];
4482 dst->i64[1] = (int64_t)src->f[1];
4483 dst->i64[2] = (int64_t)src->f[2];
4484 dst->i64[3] = (int64_t)src->f[3];
4485 }
4486
4487 static void
micro_u2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4488 micro_u2i64(union tgsi_double_channel *dst,
4489 const union tgsi_exec_channel *src)
4490 {
4491 dst->u64[0] = (uint64_t)src->u[0];
4492 dst->u64[1] = (uint64_t)src->u[1];
4493 dst->u64[2] = (uint64_t)src->u[2];
4494 dst->u64[3] = (uint64_t)src->u[3];
4495 }
4496
4497 static void
micro_i2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4498 micro_i2i64(union tgsi_double_channel *dst,
4499 const union tgsi_exec_channel *src)
4500 {
4501 dst->i64[0] = (int64_t)src->i[0];
4502 dst->i64[1] = (int64_t)src->i[1];
4503 dst->i64[2] = (int64_t)src->i[2];
4504 dst->i64[3] = (int64_t)src->i[3];
4505 }
4506
4507 static void
micro_d2u64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4508 micro_d2u64(union tgsi_double_channel *dst,
4509 const union tgsi_double_channel *src)
4510 {
4511 dst->u64[0] = (uint64_t)src->d[0];
4512 dst->u64[1] = (uint64_t)src->d[1];
4513 dst->u64[2] = (uint64_t)src->d[2];
4514 dst->u64[3] = (uint64_t)src->d[3];
4515 }
4516
4517 static void
micro_d2i64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4518 micro_d2i64(union tgsi_double_channel *dst,
4519 const union tgsi_double_channel *src)
4520 {
4521 dst->i64[0] = (int64_t)src->d[0];
4522 dst->i64[1] = (int64_t)src->d[1];
4523 dst->i64[2] = (int64_t)src->d[2];
4524 dst->i64[3] = (int64_t)src->d[3];
4525 }
4526
4527 static void
micro_u642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4528 micro_u642d(union tgsi_double_channel *dst,
4529 const union tgsi_double_channel *src)
4530 {
4531 dst->d[0] = (double)src->u64[0];
4532 dst->d[1] = (double)src->u64[1];
4533 dst->d[2] = (double)src->u64[2];
4534 dst->d[3] = (double)src->u64[3];
4535 }
4536
4537 static void
micro_i642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4538 micro_i642d(union tgsi_double_channel *dst,
4539 const union tgsi_double_channel *src)
4540 {
4541 dst->d[0] = (double)src->i64[0];
4542 dst->d[1] = (double)src->i64[1];
4543 dst->d[2] = (double)src->i64[2];
4544 dst->d[3] = (double)src->i64[3];
4545 }
4546
4547 static void
micro_u642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4548 micro_u642f(union tgsi_exec_channel *dst,
4549 const union tgsi_double_channel *src)
4550 {
4551 dst->f[0] = (float)src->u64[0];
4552 dst->f[1] = (float)src->u64[1];
4553 dst->f[2] = (float)src->u64[2];
4554 dst->f[3] = (float)src->u64[3];
4555 }
4556
4557 static void
micro_i642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4558 micro_i642f(union tgsi_exec_channel *dst,
4559 const union tgsi_double_channel *src)
4560 {
4561 dst->f[0] = (float)src->i64[0];
4562 dst->f[1] = (float)src->i64[1];
4563 dst->f[2] = (float)src->i64[2];
4564 dst->f[3] = (float)src->i64[3];
4565 }
4566
4567 static void
exec_t_2_64(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_s op,enum tgsi_exec_datatype src_datatype)4568 exec_t_2_64(struct tgsi_exec_machine *mach,
4569 const struct tgsi_full_instruction *inst,
4570 micro_dop_s op,
4571 enum tgsi_exec_datatype src_datatype)
4572 {
4573 union tgsi_exec_channel src;
4574 union tgsi_double_channel dst;
4575
4576 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
4577 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
4578 op(&dst, &src);
4579 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
4580 }
4581 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
4582 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
4583 op(&dst, &src);
4584 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
4585 }
4586 }
4587
4588 static void
exec_64_2_t(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_sop_d op,enum tgsi_exec_datatype dst_datatype)4589 exec_64_2_t(struct tgsi_exec_machine *mach,
4590 const struct tgsi_full_instruction *inst,
4591 micro_sop_d op,
4592 enum tgsi_exec_datatype dst_datatype)
4593 {
4594 union tgsi_double_channel src;
4595 union tgsi_exec_channel dst;
4596 int wm = inst->Dst[0].Register.WriteMask;
4597 int i;
4598 int bit;
4599 for (i = 0; i < 2; i++) {
4600 bit = ffs(wm);
4601 if (bit) {
4602 wm &= ~(1 << (bit - 1));
4603 if (i == 0)
4604 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
4605 else
4606 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
4607 op(&dst, &src);
4608 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype);
4609 }
4610 }
4611 }
4612
4613 static void
micro_i2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4614 micro_i2f(union tgsi_exec_channel *dst,
4615 const union tgsi_exec_channel *src)
4616 {
4617 dst->f[0] = (float)src->i[0];
4618 dst->f[1] = (float)src->i[1];
4619 dst->f[2] = (float)src->i[2];
4620 dst->f[3] = (float)src->i[3];
4621 }
4622
4623 static void
micro_not(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4624 micro_not(union tgsi_exec_channel *dst,
4625 const union tgsi_exec_channel *src)
4626 {
4627 dst->u[0] = ~src->u[0];
4628 dst->u[1] = ~src->u[1];
4629 dst->u[2] = ~src->u[2];
4630 dst->u[3] = ~src->u[3];
4631 }
4632
4633 static void
micro_shl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4634 micro_shl(union tgsi_exec_channel *dst,
4635 const union tgsi_exec_channel *src0,
4636 const union tgsi_exec_channel *src1)
4637 {
4638 unsigned masked_count;
4639 masked_count = src1->u[0] & 0x1f;
4640 dst->u[0] = src0->u[0] << masked_count;
4641 masked_count = src1->u[1] & 0x1f;
4642 dst->u[1] = src0->u[1] << masked_count;
4643 masked_count = src1->u[2] & 0x1f;
4644 dst->u[2] = src0->u[2] << masked_count;
4645 masked_count = src1->u[3] & 0x1f;
4646 dst->u[3] = src0->u[3] << masked_count;
4647 }
4648
4649 static void
micro_and(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4650 micro_and(union tgsi_exec_channel *dst,
4651 const union tgsi_exec_channel *src0,
4652 const union tgsi_exec_channel *src1)
4653 {
4654 dst->u[0] = src0->u[0] & src1->u[0];
4655 dst->u[1] = src0->u[1] & src1->u[1];
4656 dst->u[2] = src0->u[2] & src1->u[2];
4657 dst->u[3] = src0->u[3] & src1->u[3];
4658 }
4659
4660 static void
micro_or(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4661 micro_or(union tgsi_exec_channel *dst,
4662 const union tgsi_exec_channel *src0,
4663 const union tgsi_exec_channel *src1)
4664 {
4665 dst->u[0] = src0->u[0] | src1->u[0];
4666 dst->u[1] = src0->u[1] | src1->u[1];
4667 dst->u[2] = src0->u[2] | src1->u[2];
4668 dst->u[3] = src0->u[3] | src1->u[3];
4669 }
4670
4671 static void
micro_xor(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4672 micro_xor(union tgsi_exec_channel *dst,
4673 const union tgsi_exec_channel *src0,
4674 const union tgsi_exec_channel *src1)
4675 {
4676 dst->u[0] = src0->u[0] ^ src1->u[0];
4677 dst->u[1] = src0->u[1] ^ src1->u[1];
4678 dst->u[2] = src0->u[2] ^ src1->u[2];
4679 dst->u[3] = src0->u[3] ^ src1->u[3];
4680 }
4681
4682 static void
micro_mod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4683 micro_mod(union tgsi_exec_channel *dst,
4684 const union tgsi_exec_channel *src0,
4685 const union tgsi_exec_channel *src1)
4686 {
4687 dst->i[0] = src0->i[0] % src1->i[0];
4688 dst->i[1] = src0->i[1] % src1->i[1];
4689 dst->i[2] = src0->i[2] % src1->i[2];
4690 dst->i[3] = src0->i[3] % src1->i[3];
4691 }
4692
4693 static void
micro_f2i(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4694 micro_f2i(union tgsi_exec_channel *dst,
4695 const union tgsi_exec_channel *src)
4696 {
4697 dst->i[0] = (int)src->f[0];
4698 dst->i[1] = (int)src->f[1];
4699 dst->i[2] = (int)src->f[2];
4700 dst->i[3] = (int)src->f[3];
4701 }
4702
4703 static void
micro_fseq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4704 micro_fseq(union tgsi_exec_channel *dst,
4705 const union tgsi_exec_channel *src0,
4706 const union tgsi_exec_channel *src1)
4707 {
4708 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4709 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4710 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4711 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4712 }
4713
4714 static void
micro_fsge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4715 micro_fsge(union tgsi_exec_channel *dst,
4716 const union tgsi_exec_channel *src0,
4717 const union tgsi_exec_channel *src1)
4718 {
4719 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4720 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4721 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4722 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4723 }
4724
4725 static void
micro_fslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4726 micro_fslt(union tgsi_exec_channel *dst,
4727 const union tgsi_exec_channel *src0,
4728 const union tgsi_exec_channel *src1)
4729 {
4730 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4731 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4732 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4733 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4734 }
4735
4736 static void
micro_fsne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4737 micro_fsne(union tgsi_exec_channel *dst,
4738 const union tgsi_exec_channel *src0,
4739 const union tgsi_exec_channel *src1)
4740 {
4741 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4742 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4743 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4744 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4745 }
4746
4747 static void
micro_idiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4748 micro_idiv(union tgsi_exec_channel *dst,
4749 const union tgsi_exec_channel *src0,
4750 const union tgsi_exec_channel *src1)
4751 {
4752 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4753 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4754 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4755 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
4756 }
4757
4758 static void
micro_imax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4759 micro_imax(union tgsi_exec_channel *dst,
4760 const union tgsi_exec_channel *src0,
4761 const union tgsi_exec_channel *src1)
4762 {
4763 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4764 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4765 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4766 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4767 }
4768
4769 static void
micro_imin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4770 micro_imin(union tgsi_exec_channel *dst,
4771 const union tgsi_exec_channel *src0,
4772 const union tgsi_exec_channel *src1)
4773 {
4774 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4775 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4776 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4777 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4778 }
4779
4780 static void
micro_isge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4781 micro_isge(union tgsi_exec_channel *dst,
4782 const union tgsi_exec_channel *src0,
4783 const union tgsi_exec_channel *src1)
4784 {
4785 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4786 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4787 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4788 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
4789 }
4790
4791 static void
micro_ishr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4792 micro_ishr(union tgsi_exec_channel *dst,
4793 const union tgsi_exec_channel *src0,
4794 const union tgsi_exec_channel *src1)
4795 {
4796 unsigned masked_count;
4797 masked_count = src1->i[0] & 0x1f;
4798 dst->i[0] = src0->i[0] >> masked_count;
4799 masked_count = src1->i[1] & 0x1f;
4800 dst->i[1] = src0->i[1] >> masked_count;
4801 masked_count = src1->i[2] & 0x1f;
4802 dst->i[2] = src0->i[2] >> masked_count;
4803 masked_count = src1->i[3] & 0x1f;
4804 dst->i[3] = src0->i[3] >> masked_count;
4805 }
4806
4807 static void
micro_islt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4808 micro_islt(union tgsi_exec_channel *dst,
4809 const union tgsi_exec_channel *src0,
4810 const union tgsi_exec_channel *src1)
4811 {
4812 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4813 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4814 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4815 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4816 }
4817
4818 static void
micro_f2u(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4819 micro_f2u(union tgsi_exec_channel *dst,
4820 const union tgsi_exec_channel *src)
4821 {
4822 dst->u[0] = (uint)src->f[0];
4823 dst->u[1] = (uint)src->f[1];
4824 dst->u[2] = (uint)src->f[2];
4825 dst->u[3] = (uint)src->f[3];
4826 }
4827
4828 static void
micro_u2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4829 micro_u2f(union tgsi_exec_channel *dst,
4830 const union tgsi_exec_channel *src)
4831 {
4832 dst->f[0] = (float)src->u[0];
4833 dst->f[1] = (float)src->u[1];
4834 dst->f[2] = (float)src->u[2];
4835 dst->f[3] = (float)src->u[3];
4836 }
4837
4838 static void
micro_uadd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4839 micro_uadd(union tgsi_exec_channel *dst,
4840 const union tgsi_exec_channel *src0,
4841 const union tgsi_exec_channel *src1)
4842 {
4843 dst->u[0] = src0->u[0] + src1->u[0];
4844 dst->u[1] = src0->u[1] + src1->u[1];
4845 dst->u[2] = src0->u[2] + src1->u[2];
4846 dst->u[3] = src0->u[3] + src1->u[3];
4847 }
4848
4849 static void
micro_udiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4850 micro_udiv(union tgsi_exec_channel *dst,
4851 const union tgsi_exec_channel *src0,
4852 const union tgsi_exec_channel *src1)
4853 {
4854 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4855 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4856 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4857 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4858 }
4859
4860 static void
micro_umad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4861 micro_umad(union tgsi_exec_channel *dst,
4862 const union tgsi_exec_channel *src0,
4863 const union tgsi_exec_channel *src1,
4864 const union tgsi_exec_channel *src2)
4865 {
4866 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4867 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4868 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4869 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4870 }
4871
4872 static void
micro_umax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4873 micro_umax(union tgsi_exec_channel *dst,
4874 const union tgsi_exec_channel *src0,
4875 const union tgsi_exec_channel *src1)
4876 {
4877 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4878 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4879 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4880 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4881 }
4882
4883 static void
micro_umin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4884 micro_umin(union tgsi_exec_channel *dst,
4885 const union tgsi_exec_channel *src0,
4886 const union tgsi_exec_channel *src1)
4887 {
4888 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4889 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4890 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4891 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4892 }
4893
4894 static void
micro_umod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4895 micro_umod(union tgsi_exec_channel *dst,
4896 const union tgsi_exec_channel *src0,
4897 const union tgsi_exec_channel *src1)
4898 {
4899 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4900 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4901 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4902 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4903 }
4904
4905 static void
micro_umul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4906 micro_umul(union tgsi_exec_channel *dst,
4907 const union tgsi_exec_channel *src0,
4908 const union tgsi_exec_channel *src1)
4909 {
4910 dst->u[0] = src0->u[0] * src1->u[0];
4911 dst->u[1] = src0->u[1] * src1->u[1];
4912 dst->u[2] = src0->u[2] * src1->u[2];
4913 dst->u[3] = src0->u[3] * src1->u[3];
4914 }
4915
4916 static void
micro_imul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4917 micro_imul_hi(union tgsi_exec_channel *dst,
4918 const union tgsi_exec_channel *src0,
4919 const union tgsi_exec_channel *src1)
4920 {
4921 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4922 dst->i[0] = I64M(src0->i[0], src1->i[0]);
4923 dst->i[1] = I64M(src0->i[1], src1->i[1]);
4924 dst->i[2] = I64M(src0->i[2], src1->i[2]);
4925 dst->i[3] = I64M(src0->i[3], src1->i[3]);
4926 #undef I64M
4927 }
4928
4929 static void
micro_umul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4930 micro_umul_hi(union tgsi_exec_channel *dst,
4931 const union tgsi_exec_channel *src0,
4932 const union tgsi_exec_channel *src1)
4933 {
4934 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4935 dst->u[0] = U64M(src0->u[0], src1->u[0]);
4936 dst->u[1] = U64M(src0->u[1], src1->u[1]);
4937 dst->u[2] = U64M(src0->u[2], src1->u[2]);
4938 dst->u[3] = U64M(src0->u[3], src1->u[3]);
4939 #undef U64M
4940 }
4941
4942 static void
micro_useq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4943 micro_useq(union tgsi_exec_channel *dst,
4944 const union tgsi_exec_channel *src0,
4945 const union tgsi_exec_channel *src1)
4946 {
4947 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4948 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4949 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4950 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4951 }
4952
4953 static void
micro_usge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4954 micro_usge(union tgsi_exec_channel *dst,
4955 const union tgsi_exec_channel *src0,
4956 const union tgsi_exec_channel *src1)
4957 {
4958 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4959 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4960 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4961 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4962 }
4963
4964 static void
micro_ushr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4965 micro_ushr(union tgsi_exec_channel *dst,
4966 const union tgsi_exec_channel *src0,
4967 const union tgsi_exec_channel *src1)
4968 {
4969 unsigned masked_count;
4970 masked_count = src1->u[0] & 0x1f;
4971 dst->u[0] = src0->u[0] >> masked_count;
4972 masked_count = src1->u[1] & 0x1f;
4973 dst->u[1] = src0->u[1] >> masked_count;
4974 masked_count = src1->u[2] & 0x1f;
4975 dst->u[2] = src0->u[2] >> masked_count;
4976 masked_count = src1->u[3] & 0x1f;
4977 dst->u[3] = src0->u[3] >> masked_count;
4978 }
4979
4980 static void
micro_uslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4981 micro_uslt(union tgsi_exec_channel *dst,
4982 const union tgsi_exec_channel *src0,
4983 const union tgsi_exec_channel *src1)
4984 {
4985 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4986 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4987 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4988 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4989 }
4990
4991 static void
micro_usne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4992 micro_usne(union tgsi_exec_channel *dst,
4993 const union tgsi_exec_channel *src0,
4994 const union tgsi_exec_channel *src1)
4995 {
4996 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4997 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4998 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4999 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
5000 }
5001
5002 static void
micro_uarl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5003 micro_uarl(union tgsi_exec_channel *dst,
5004 const union tgsi_exec_channel *src)
5005 {
5006 dst->i[0] = src->u[0];
5007 dst->i[1] = src->u[1];
5008 dst->i[2] = src->u[2];
5009 dst->i[3] = src->u[3];
5010 }
5011
5012 static void
micro_ucmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)5013 micro_ucmp(union tgsi_exec_channel *dst,
5014 const union tgsi_exec_channel *src0,
5015 const union tgsi_exec_channel *src1,
5016 const union tgsi_exec_channel *src2)
5017 {
5018 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
5019 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
5020 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
5021 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
5022 }
5023
5024 /**
5025 * Signed bitfield extract (i.e. sign-extend the extracted bits)
5026 */
5027 static void
micro_ibfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)5028 micro_ibfe(union tgsi_exec_channel *dst,
5029 const union tgsi_exec_channel *src0,
5030 const union tgsi_exec_channel *src1,
5031 const union tgsi_exec_channel *src2)
5032 {
5033 int i;
5034 for (i = 0; i < 4; i++) {
5035 int width = src2->i[i] & 0x1f;
5036 int offset = src1->i[i] & 0x1f;
5037 if (width == 0)
5038 dst->i[i] = 0;
5039 else if (width + offset < 32)
5040 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
5041 else
5042 dst->i[i] = src0->i[i] >> offset;
5043 }
5044 }
5045
5046 /**
5047 * Unsigned bitfield extract
5048 */
5049 static void
micro_ubfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)5050 micro_ubfe(union tgsi_exec_channel *dst,
5051 const union tgsi_exec_channel *src0,
5052 const union tgsi_exec_channel *src1,
5053 const union tgsi_exec_channel *src2)
5054 {
5055 int i;
5056 for (i = 0; i < 4; i++) {
5057 int width = src2->u[i] & 0x1f;
5058 int offset = src1->u[i] & 0x1f;
5059 if (width == 0)
5060 dst->u[i] = 0;
5061 else if (width + offset < 32)
5062 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
5063 else
5064 dst->u[i] = src0->u[i] >> offset;
5065 }
5066 }
5067
5068 /**
5069 * Bitfield insert: copy low bits from src1 into a region of src0.
5070 */
5071 static void
micro_bfi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)5072 micro_bfi(union tgsi_exec_channel *dst,
5073 const union tgsi_exec_channel *src0,
5074 const union tgsi_exec_channel *src1,
5075 const union tgsi_exec_channel *src2,
5076 const union tgsi_exec_channel *src3)
5077 {
5078 int i;
5079 for (i = 0; i < 4; i++) {
5080 int width = src3->u[i] & 0x1f;
5081 int offset = src2->u[i] & 0x1f;
5082 int bitmask = ((1 << width) - 1) << offset;
5083 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
5084 }
5085 }
5086
5087 static void
micro_brev(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5088 micro_brev(union tgsi_exec_channel *dst,
5089 const union tgsi_exec_channel *src)
5090 {
5091 dst->u[0] = util_bitreverse(src->u[0]);
5092 dst->u[1] = util_bitreverse(src->u[1]);
5093 dst->u[2] = util_bitreverse(src->u[2]);
5094 dst->u[3] = util_bitreverse(src->u[3]);
5095 }
5096
5097 static void
micro_popc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5098 micro_popc(union tgsi_exec_channel *dst,
5099 const union tgsi_exec_channel *src)
5100 {
5101 dst->u[0] = util_bitcount(src->u[0]);
5102 dst->u[1] = util_bitcount(src->u[1]);
5103 dst->u[2] = util_bitcount(src->u[2]);
5104 dst->u[3] = util_bitcount(src->u[3]);
5105 }
5106
5107 static void
micro_lsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5108 micro_lsb(union tgsi_exec_channel *dst,
5109 const union tgsi_exec_channel *src)
5110 {
5111 dst->i[0] = ffs(src->u[0]) - 1;
5112 dst->i[1] = ffs(src->u[1]) - 1;
5113 dst->i[2] = ffs(src->u[2]) - 1;
5114 dst->i[3] = ffs(src->u[3]) - 1;
5115 }
5116
5117 static void
micro_imsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5118 micro_imsb(union tgsi_exec_channel *dst,
5119 const union tgsi_exec_channel *src)
5120 {
5121 dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
5122 dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
5123 dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
5124 dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
5125 }
5126
5127 static void
micro_umsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)5128 micro_umsb(union tgsi_exec_channel *dst,
5129 const union tgsi_exec_channel *src)
5130 {
5131 dst->i[0] = util_last_bit(src->u[0]) - 1;
5132 dst->i[1] = util_last_bit(src->u[1]) - 1;
5133 dst->i[2] = util_last_bit(src->u[2]) - 1;
5134 dst->i[3] = util_last_bit(src->u[3]) - 1;
5135 }
5136
5137 /**
5138 * Execute a TGSI instruction.
5139 * Returns TRUE if a barrier instruction is hit,
5140 * otherwise FALSE.
5141 */
5142 static boolean
exec_instruction(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int * pc)5143 exec_instruction(
5144 struct tgsi_exec_machine *mach,
5145 const struct tgsi_full_instruction *inst,
5146 int *pc )
5147 {
5148 union tgsi_exec_channel r[10];
5149
5150 (*pc)++;
5151
5152 switch (inst->Instruction.Opcode) {
5153 case TGSI_OPCODE_ARL:
5154 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5155 break;
5156
5157 case TGSI_OPCODE_MOV:
5158 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5159 break;
5160
5161 case TGSI_OPCODE_LIT:
5162 exec_lit(mach, inst);
5163 break;
5164
5165 case TGSI_OPCODE_RCP:
5166 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5167 break;
5168
5169 case TGSI_OPCODE_RSQ:
5170 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5171 break;
5172
5173 case TGSI_OPCODE_EXP:
5174 exec_exp(mach, inst);
5175 break;
5176
5177 case TGSI_OPCODE_LOG:
5178 exec_log(mach, inst);
5179 break;
5180
5181 case TGSI_OPCODE_MUL:
5182 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5183 break;
5184
5185 case TGSI_OPCODE_ADD:
5186 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5187 break;
5188
5189 case TGSI_OPCODE_DP3:
5190 exec_dp3(mach, inst);
5191 break;
5192
5193 case TGSI_OPCODE_DP4:
5194 exec_dp4(mach, inst);
5195 break;
5196
5197 case TGSI_OPCODE_DST:
5198 exec_dst(mach, inst);
5199 break;
5200
5201 case TGSI_OPCODE_MIN:
5202 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5203 break;
5204
5205 case TGSI_OPCODE_MAX:
5206 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5207 break;
5208
5209 case TGSI_OPCODE_SLT:
5210 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5211 break;
5212
5213 case TGSI_OPCODE_SGE:
5214 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5215 break;
5216
5217 case TGSI_OPCODE_MAD:
5218 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5219 break;
5220
5221 case TGSI_OPCODE_LRP:
5222 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5223 break;
5224
5225 case TGSI_OPCODE_SQRT:
5226 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5227 break;
5228
5229 case TGSI_OPCODE_DP2A:
5230 exec_dp2a(mach, inst);
5231 break;
5232
5233 case TGSI_OPCODE_FRC:
5234 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5235 break;
5236
5237 case TGSI_OPCODE_CLAMP:
5238 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5239 break;
5240
5241 case TGSI_OPCODE_FLR:
5242 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5243 break;
5244
5245 case TGSI_OPCODE_ROUND:
5246 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5247 break;
5248
5249 case TGSI_OPCODE_EX2:
5250 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5251 break;
5252
5253 case TGSI_OPCODE_LG2:
5254 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5255 break;
5256
5257 case TGSI_OPCODE_POW:
5258 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5259 break;
5260
5261 case TGSI_OPCODE_XPD:
5262 exec_xpd(mach, inst);
5263 break;
5264
5265 case TGSI_OPCODE_DPH:
5266 exec_dph(mach, inst);
5267 break;
5268
5269 case TGSI_OPCODE_COS:
5270 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5271 break;
5272
5273 case TGSI_OPCODE_DDX:
5274 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5275 break;
5276
5277 case TGSI_OPCODE_DDY:
5278 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5279 break;
5280
5281 case TGSI_OPCODE_KILL:
5282 exec_kill (mach, inst);
5283 break;
5284
5285 case TGSI_OPCODE_KILL_IF:
5286 exec_kill_if (mach, inst);
5287 break;
5288
5289 case TGSI_OPCODE_PK2H:
5290 exec_pk2h(mach, inst);
5291 break;
5292
5293 case TGSI_OPCODE_PK2US:
5294 assert (0);
5295 break;
5296
5297 case TGSI_OPCODE_PK4B:
5298 assert (0);
5299 break;
5300
5301 case TGSI_OPCODE_PK4UB:
5302 assert (0);
5303 break;
5304
5305 case TGSI_OPCODE_SEQ:
5306 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5307 break;
5308
5309 case TGSI_OPCODE_SGT:
5310 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5311 break;
5312
5313 case TGSI_OPCODE_SIN:
5314 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5315 break;
5316
5317 case TGSI_OPCODE_SLE:
5318 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5319 break;
5320
5321 case TGSI_OPCODE_SNE:
5322 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5323 break;
5324
5325 case TGSI_OPCODE_TEX:
5326 /* simple texture lookup */
5327 /* src[0] = texcoord */
5328 /* src[1] = sampler unit */
5329 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
5330 break;
5331
5332 case TGSI_OPCODE_TXB:
5333 /* Texture lookup with lod bias */
5334 /* src[0] = texcoord (src[0].w = LOD bias) */
5335 /* src[1] = sampler unit */
5336 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
5337 break;
5338
5339 case TGSI_OPCODE_TXD:
5340 /* Texture lookup with explict partial derivatives */
5341 /* src[0] = texcoord */
5342 /* src[1] = d[strq]/dx */
5343 /* src[2] = d[strq]/dy */
5344 /* src[3] = sampler unit */
5345 exec_txd(mach, inst);
5346 break;
5347
5348 case TGSI_OPCODE_TXL:
5349 /* Texture lookup with explit LOD */
5350 /* src[0] = texcoord (src[0].w = LOD) */
5351 /* src[1] = sampler unit */
5352 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
5353 break;
5354
5355 case TGSI_OPCODE_TXP:
5356 /* Texture lookup with projection */
5357 /* src[0] = texcoord (src[0].w = projection) */
5358 /* src[1] = sampler unit */
5359 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
5360 break;
5361
5362 case TGSI_OPCODE_TG4:
5363 /* src[0] = texcoord */
5364 /* src[1] = component */
5365 /* src[2] = sampler unit */
5366 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
5367 break;
5368
5369 case TGSI_OPCODE_LODQ:
5370 /* src[0] = texcoord */
5371 /* src[1] = sampler unit */
5372 exec_lodq(mach, inst);
5373 break;
5374
5375 case TGSI_OPCODE_UP2H:
5376 exec_up2h(mach, inst);
5377 break;
5378
5379 case TGSI_OPCODE_UP2US:
5380 assert (0);
5381 break;
5382
5383 case TGSI_OPCODE_UP4B:
5384 assert (0);
5385 break;
5386
5387 case TGSI_OPCODE_UP4UB:
5388 assert (0);
5389 break;
5390
5391 case TGSI_OPCODE_ARR:
5392 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5393 break;
5394
5395 case TGSI_OPCODE_CAL:
5396 /* skip the call if no execution channels are enabled */
5397 if (mach->ExecMask) {
5398 /* do the call */
5399
5400 /* First, record the depths of the execution stacks.
5401 * This is important for deeply nested/looped return statements.
5402 * We have to unwind the stacks by the correct amount. For a
5403 * real code generator, we could determine the number of entries
5404 * to pop off each stack with simple static analysis and avoid
5405 * implementing this data structure at run time.
5406 */
5407 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
5408 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
5409 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5410 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5411 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
5412 /* note that PC was already incremented above */
5413 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
5414
5415 mach->CallStackTop++;
5416
5417 /* Second, push the Cond, Loop, Cont, Func stacks */
5418 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5419 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5420 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5421 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5422 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5423 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5424
5425 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5426 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5427 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5428 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5429 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5430 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
5431
5432 /* Finally, jump to the subroutine. The label is a pointer
5433 * (an instruction number) to the BGNSUB instruction.
5434 */
5435 *pc = inst->Label.Label;
5436 assert(mach->Instructions[*pc].Instruction.Opcode
5437 == TGSI_OPCODE_BGNSUB);
5438 }
5439 break;
5440
5441 case TGSI_OPCODE_RET:
5442 mach->FuncMask &= ~mach->ExecMask;
5443 UPDATE_EXEC_MASK(mach);
5444
5445 if (mach->FuncMask == 0x0) {
5446 /* really return now (otherwise, keep executing */
5447
5448 if (mach->CallStackTop == 0) {
5449 /* returning from main() */
5450 mach->CondStackTop = 0;
5451 mach->LoopStackTop = 0;
5452 mach->ContStackTop = 0;
5453 mach->LoopLabelStackTop = 0;
5454 mach->SwitchStackTop = 0;
5455 mach->BreakStackTop = 0;
5456 *pc = -1;
5457 return FALSE;
5458 }
5459
5460 assert(mach->CallStackTop > 0);
5461 mach->CallStackTop--;
5462
5463 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5464 mach->CondMask = mach->CondStack[mach->CondStackTop];
5465
5466 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5467 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5468
5469 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5470 mach->ContMask = mach->ContStack[mach->ContStackTop];
5471
5472 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5473 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5474
5475 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5476 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5477
5478 assert(mach->FuncStackTop > 0);
5479 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5480
5481 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5482
5483 UPDATE_EXEC_MASK(mach);
5484 }
5485 break;
5486
5487 case TGSI_OPCODE_SSG:
5488 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5489 break;
5490
5491 case TGSI_OPCODE_CMP:
5492 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5493 break;
5494
5495 case TGSI_OPCODE_SCS:
5496 exec_scs(mach, inst);
5497 break;
5498
5499 case TGSI_OPCODE_DIV:
5500 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5501 break;
5502
5503 case TGSI_OPCODE_DP2:
5504 exec_dp2(mach, inst);
5505 break;
5506
5507 case TGSI_OPCODE_IF:
5508 /* push CondMask */
5509 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5510 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5511 FETCH( &r[0], 0, TGSI_CHAN_X );
5512 /* update CondMask */
5513 if( ! r[0].f[0] ) {
5514 mach->CondMask &= ~0x1;
5515 }
5516 if( ! r[0].f[1] ) {
5517 mach->CondMask &= ~0x2;
5518 }
5519 if( ! r[0].f[2] ) {
5520 mach->CondMask &= ~0x4;
5521 }
5522 if( ! r[0].f[3] ) {
5523 mach->CondMask &= ~0x8;
5524 }
5525 UPDATE_EXEC_MASK(mach);
5526 /* Todo: If CondMask==0, jump to ELSE */
5527 break;
5528
5529 case TGSI_OPCODE_UIF:
5530 /* push CondMask */
5531 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5532 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5533 IFETCH( &r[0], 0, TGSI_CHAN_X );
5534 /* update CondMask */
5535 if( ! r[0].u[0] ) {
5536 mach->CondMask &= ~0x1;
5537 }
5538 if( ! r[0].u[1] ) {
5539 mach->CondMask &= ~0x2;
5540 }
5541 if( ! r[0].u[2] ) {
5542 mach->CondMask &= ~0x4;
5543 }
5544 if( ! r[0].u[3] ) {
5545 mach->CondMask &= ~0x8;
5546 }
5547 UPDATE_EXEC_MASK(mach);
5548 /* Todo: If CondMask==0, jump to ELSE */
5549 break;
5550
5551 case TGSI_OPCODE_ELSE:
5552 /* invert CondMask wrt previous mask */
5553 {
5554 uint prevMask;
5555 assert(mach->CondStackTop > 0);
5556 prevMask = mach->CondStack[mach->CondStackTop - 1];
5557 mach->CondMask = ~mach->CondMask & prevMask;
5558 UPDATE_EXEC_MASK(mach);
5559 /* Todo: If CondMask==0, jump to ENDIF */
5560 }
5561 break;
5562
5563 case TGSI_OPCODE_ENDIF:
5564 /* pop CondMask */
5565 assert(mach->CondStackTop > 0);
5566 mach->CondMask = mach->CondStack[--mach->CondStackTop];
5567 UPDATE_EXEC_MASK(mach);
5568 break;
5569
5570 case TGSI_OPCODE_END:
5571 /* make sure we end primitives which haven't
5572 * been explicitly emitted */
5573 conditional_emit_primitive(mach);
5574 /* halt execution */
5575 *pc = -1;
5576 break;
5577
5578 case TGSI_OPCODE_PUSHA:
5579 assert (0);
5580 break;
5581
5582 case TGSI_OPCODE_POPA:
5583 assert (0);
5584 break;
5585
5586 case TGSI_OPCODE_CEIL:
5587 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5588 break;
5589
5590 case TGSI_OPCODE_I2F:
5591 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT);
5592 break;
5593
5594 case TGSI_OPCODE_NOT:
5595 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5596 break;
5597
5598 case TGSI_OPCODE_TRUNC:
5599 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT);
5600 break;
5601
5602 case TGSI_OPCODE_SHL:
5603 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5604 break;
5605
5606 case TGSI_OPCODE_AND:
5607 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5608 break;
5609
5610 case TGSI_OPCODE_OR:
5611 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5612 break;
5613
5614 case TGSI_OPCODE_MOD:
5615 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5616 break;
5617
5618 case TGSI_OPCODE_XOR:
5619 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5620 break;
5621
5622 case TGSI_OPCODE_SAD:
5623 assert (0);
5624 break;
5625
5626 case TGSI_OPCODE_TXF:
5627 exec_txf(mach, inst);
5628 break;
5629
5630 case TGSI_OPCODE_TXQ:
5631 exec_txq(mach, inst);
5632 break;
5633
5634 case TGSI_OPCODE_EMIT:
5635 emit_vertex(mach);
5636 break;
5637
5638 case TGSI_OPCODE_ENDPRIM:
5639 emit_primitive(mach);
5640 break;
5641
5642 case TGSI_OPCODE_BGNLOOP:
5643 /* push LoopMask and ContMasks */
5644 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5645 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5646 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5647 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5648
5649 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5650 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5651 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5652 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5653 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
5654 break;
5655
5656 case TGSI_OPCODE_ENDLOOP:
5657 /* Restore ContMask, but don't pop */
5658 assert(mach->ContStackTop > 0);
5659 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
5660 UPDATE_EXEC_MASK(mach);
5661 if (mach->ExecMask) {
5662 /* repeat loop: jump to instruction just past BGNLOOP */
5663 assert(mach->LoopLabelStackTop > 0);
5664 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
5665 }
5666 else {
5667 /* exit loop: pop LoopMask */
5668 assert(mach->LoopStackTop > 0);
5669 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
5670 /* pop ContMask */
5671 assert(mach->ContStackTop > 0);
5672 mach->ContMask = mach->ContStack[--mach->ContStackTop];
5673 assert(mach->LoopLabelStackTop > 0);
5674 --mach->LoopLabelStackTop;
5675
5676 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
5677 }
5678 UPDATE_EXEC_MASK(mach);
5679 break;
5680
5681 case TGSI_OPCODE_BRK:
5682 exec_break(mach);
5683 break;
5684
5685 case TGSI_OPCODE_CONT:
5686 /* turn off cont channels for each enabled exec channel */
5687 mach->ContMask &= ~mach->ExecMask;
5688 /* Todo: if mach->LoopMask == 0, jump to end of loop */
5689 UPDATE_EXEC_MASK(mach);
5690 break;
5691
5692 case TGSI_OPCODE_BGNSUB:
5693 /* no-op */
5694 break;
5695
5696 case TGSI_OPCODE_ENDSUB:
5697 /*
5698 * XXX: This really should be a no-op. We should never reach this opcode.
5699 */
5700
5701 assert(mach->CallStackTop > 0);
5702 mach->CallStackTop--;
5703
5704 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5705 mach->CondMask = mach->CondStack[mach->CondStackTop];
5706
5707 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5708 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5709
5710 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5711 mach->ContMask = mach->ContStack[mach->ContStackTop];
5712
5713 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5714 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5715
5716 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5717 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5718
5719 assert(mach->FuncStackTop > 0);
5720 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5721
5722 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5723
5724 UPDATE_EXEC_MASK(mach);
5725 break;
5726
5727 case TGSI_OPCODE_NOP:
5728 break;
5729
5730 case TGSI_OPCODE_BREAKC:
5731 IFETCH(&r[0], 0, TGSI_CHAN_X);
5732 /* update CondMask */
5733 if (r[0].u[0] && (mach->ExecMask & 0x1)) {
5734 mach->LoopMask &= ~0x1;
5735 }
5736 if (r[0].u[1] && (mach->ExecMask & 0x2)) {
5737 mach->LoopMask &= ~0x2;
5738 }
5739 if (r[0].u[2] && (mach->ExecMask & 0x4)) {
5740 mach->LoopMask &= ~0x4;
5741 }
5742 if (r[0].u[3] && (mach->ExecMask & 0x8)) {
5743 mach->LoopMask &= ~0x8;
5744 }
5745 /* Todo: if mach->LoopMask == 0, jump to end of loop */
5746 UPDATE_EXEC_MASK(mach);
5747 break;
5748
5749 case TGSI_OPCODE_F2I:
5750 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT);
5751 break;
5752
5753 case TGSI_OPCODE_FSEQ:
5754 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5755 break;
5756
5757 case TGSI_OPCODE_FSGE:
5758 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5759 break;
5760
5761 case TGSI_OPCODE_FSLT:
5762 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5763 break;
5764
5765 case TGSI_OPCODE_FSNE:
5766 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5767 break;
5768
5769 case TGSI_OPCODE_IDIV:
5770 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5771 break;
5772
5773 case TGSI_OPCODE_IMAX:
5774 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5775 break;
5776
5777 case TGSI_OPCODE_IMIN:
5778 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5779 break;
5780
5781 case TGSI_OPCODE_INEG:
5782 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5783 break;
5784
5785 case TGSI_OPCODE_ISGE:
5786 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5787 break;
5788
5789 case TGSI_OPCODE_ISHR:
5790 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5791 break;
5792
5793 case TGSI_OPCODE_ISLT:
5794 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5795 break;
5796
5797 case TGSI_OPCODE_F2U:
5798 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT);
5799 break;
5800
5801 case TGSI_OPCODE_U2F:
5802 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT);
5803 break;
5804
5805 case TGSI_OPCODE_UADD:
5806 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5807 break;
5808
5809 case TGSI_OPCODE_UDIV:
5810 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5811 break;
5812
5813 case TGSI_OPCODE_UMAD:
5814 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5815 break;
5816
5817 case TGSI_OPCODE_UMAX:
5818 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5819 break;
5820
5821 case TGSI_OPCODE_UMIN:
5822 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5823 break;
5824
5825 case TGSI_OPCODE_UMOD:
5826 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5827 break;
5828
5829 case TGSI_OPCODE_UMUL:
5830 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5831 break;
5832
5833 case TGSI_OPCODE_IMUL_HI:
5834 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5835 break;
5836
5837 case TGSI_OPCODE_UMUL_HI:
5838 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5839 break;
5840
5841 case TGSI_OPCODE_USEQ:
5842 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5843 break;
5844
5845 case TGSI_OPCODE_USGE:
5846 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5847 break;
5848
5849 case TGSI_OPCODE_USHR:
5850 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5851 break;
5852
5853 case TGSI_OPCODE_USLT:
5854 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5855 break;
5856
5857 case TGSI_OPCODE_USNE:
5858 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5859 break;
5860
5861 case TGSI_OPCODE_SWITCH:
5862 exec_switch(mach, inst);
5863 break;
5864
5865 case TGSI_OPCODE_CASE:
5866 exec_case(mach, inst);
5867 break;
5868
5869 case TGSI_OPCODE_DEFAULT:
5870 exec_default(mach);
5871 break;
5872
5873 case TGSI_OPCODE_ENDSWITCH:
5874 exec_endswitch(mach);
5875 break;
5876
5877 case TGSI_OPCODE_SAMPLE_I:
5878 exec_txf(mach, inst);
5879 break;
5880
5881 case TGSI_OPCODE_SAMPLE_I_MS:
5882 exec_txf(mach, inst);
5883 break;
5884
5885 case TGSI_OPCODE_SAMPLE:
5886 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
5887 break;
5888
5889 case TGSI_OPCODE_SAMPLE_B:
5890 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
5891 break;
5892
5893 case TGSI_OPCODE_SAMPLE_C:
5894 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
5895 break;
5896
5897 case TGSI_OPCODE_SAMPLE_C_LZ:
5898 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
5899 break;
5900
5901 case TGSI_OPCODE_SAMPLE_D:
5902 exec_sample_d(mach, inst);
5903 break;
5904
5905 case TGSI_OPCODE_SAMPLE_L:
5906 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
5907 break;
5908
5909 case TGSI_OPCODE_GATHER4:
5910 assert(0);
5911 break;
5912
5913 case TGSI_OPCODE_SVIEWINFO:
5914 exec_txq(mach, inst);
5915 break;
5916
5917 case TGSI_OPCODE_SAMPLE_POS:
5918 assert(0);
5919 break;
5920
5921 case TGSI_OPCODE_SAMPLE_INFO:
5922 assert(0);
5923 break;
5924
5925 case TGSI_OPCODE_UARL:
5926 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5927 break;
5928
5929 case TGSI_OPCODE_UCMP:
5930 exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5931 break;
5932
5933 case TGSI_OPCODE_IABS:
5934 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5935 break;
5936
5937 case TGSI_OPCODE_ISSG:
5938 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5939 break;
5940
5941 case TGSI_OPCODE_TEX2:
5942 /* simple texture lookup */
5943 /* src[0] = texcoord */
5944 /* src[1] = compare */
5945 /* src[2] = sampler unit */
5946 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
5947 break;
5948 case TGSI_OPCODE_TXB2:
5949 /* simple texture lookup */
5950 /* src[0] = texcoord */
5951 /* src[1] = bias */
5952 /* src[2] = sampler unit */
5953 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
5954 break;
5955 case TGSI_OPCODE_TXL2:
5956 /* simple texture lookup */
5957 /* src[0] = texcoord */
5958 /* src[1] = lod */
5959 /* src[2] = sampler unit */
5960 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
5961 break;
5962
5963 case TGSI_OPCODE_IBFE:
5964 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5965 break;
5966 case TGSI_OPCODE_UBFE:
5967 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5968 break;
5969 case TGSI_OPCODE_BFI:
5970 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5971 break;
5972 case TGSI_OPCODE_BREV:
5973 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5974 break;
5975 case TGSI_OPCODE_POPC:
5976 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT);
5977 break;
5978 case TGSI_OPCODE_LSB:
5979 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5980 break;
5981 case TGSI_OPCODE_IMSB:
5982 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT);
5983 break;
5984 case TGSI_OPCODE_UMSB:
5985 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT);
5986 break;
5987
5988 case TGSI_OPCODE_F2D:
5989 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
5990 break;
5991
5992 case TGSI_OPCODE_D2F:
5993 exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT);
5994 break;
5995
5996 case TGSI_OPCODE_DABS:
5997 exec_double_unary(mach, inst, micro_dabs);
5998 break;
5999
6000 case TGSI_OPCODE_DNEG:
6001 exec_double_unary(mach, inst, micro_dneg);
6002 break;
6003
6004 case TGSI_OPCODE_DADD:
6005 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
6006 break;
6007
6008 case TGSI_OPCODE_DDIV:
6009 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
6010 break;
6011
6012 case TGSI_OPCODE_DMUL:
6013 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
6014 break;
6015
6016 case TGSI_OPCODE_DMAX:
6017 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
6018 break;
6019
6020 case TGSI_OPCODE_DMIN:
6021 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
6022 break;
6023
6024 case TGSI_OPCODE_DSLT:
6025 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
6026 break;
6027
6028 case TGSI_OPCODE_DSGE:
6029 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
6030 break;
6031
6032 case TGSI_OPCODE_DSEQ:
6033 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
6034 break;
6035
6036 case TGSI_OPCODE_DSNE:
6037 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
6038 break;
6039
6040 case TGSI_OPCODE_DRCP:
6041 exec_double_unary(mach, inst, micro_drcp);
6042 break;
6043
6044 case TGSI_OPCODE_DSQRT:
6045 exec_double_unary(mach, inst, micro_dsqrt);
6046 break;
6047
6048 case TGSI_OPCODE_DRSQ:
6049 exec_double_unary(mach, inst, micro_drsq);
6050 break;
6051
6052 case TGSI_OPCODE_DMAD:
6053 exec_double_trinary(mach, inst, micro_dmad);
6054 break;
6055
6056 case TGSI_OPCODE_DFRAC:
6057 exec_double_unary(mach, inst, micro_dfrac);
6058 break;
6059
6060 case TGSI_OPCODE_DLDEXP:
6061 exec_dldexp(mach, inst);
6062 break;
6063
6064 case TGSI_OPCODE_DFRACEXP:
6065 exec_dfracexp(mach, inst);
6066 break;
6067
6068 case TGSI_OPCODE_I2D:
6069 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT);
6070 break;
6071
6072 case TGSI_OPCODE_D2I:
6073 exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT);
6074 break;
6075
6076 case TGSI_OPCODE_U2D:
6077 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT);
6078 break;
6079
6080 case TGSI_OPCODE_D2U:
6081 exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT);
6082 break;
6083
6084 case TGSI_OPCODE_LOAD:
6085 exec_load(mach, inst);
6086 break;
6087
6088 case TGSI_OPCODE_STORE:
6089 exec_store(mach, inst);
6090 break;
6091
6092 case TGSI_OPCODE_ATOMUADD:
6093 case TGSI_OPCODE_ATOMXCHG:
6094 case TGSI_OPCODE_ATOMCAS:
6095 case TGSI_OPCODE_ATOMAND:
6096 case TGSI_OPCODE_ATOMOR:
6097 case TGSI_OPCODE_ATOMXOR:
6098 case TGSI_OPCODE_ATOMUMIN:
6099 case TGSI_OPCODE_ATOMUMAX:
6100 case TGSI_OPCODE_ATOMIMIN:
6101 case TGSI_OPCODE_ATOMIMAX:
6102 exec_atomop(mach, inst);
6103 break;
6104
6105 case TGSI_OPCODE_RESQ:
6106 exec_resq(mach, inst);
6107 break;
6108 case TGSI_OPCODE_BARRIER:
6109 case TGSI_OPCODE_MEMBAR:
6110 return TRUE;
6111 break;
6112
6113 case TGSI_OPCODE_I64ABS:
6114 exec_double_unary(mach, inst, micro_i64abs);
6115 break;
6116
6117 case TGSI_OPCODE_I64SSG:
6118 exec_double_unary(mach, inst, micro_i64sgn);
6119 break;
6120
6121 case TGSI_OPCODE_I64NEG:
6122 exec_double_unary(mach, inst, micro_i64neg);
6123 break;
6124
6125 case TGSI_OPCODE_U64SEQ:
6126 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
6127 break;
6128
6129 case TGSI_OPCODE_U64SNE:
6130 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
6131 break;
6132
6133 case TGSI_OPCODE_I64SLT:
6134 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
6135 break;
6136 case TGSI_OPCODE_U64SLT:
6137 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
6138 break;
6139
6140 case TGSI_OPCODE_I64SGE:
6141 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
6142 break;
6143 case TGSI_OPCODE_U64SGE:
6144 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
6145 break;
6146
6147 case TGSI_OPCODE_I64MIN:
6148 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
6149 break;
6150 case TGSI_OPCODE_U64MIN:
6151 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
6152 break;
6153 case TGSI_OPCODE_I64MAX:
6154 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
6155 break;
6156 case TGSI_OPCODE_U64MAX:
6157 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
6158 break;
6159 case TGSI_OPCODE_U64ADD:
6160 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
6161 break;
6162 case TGSI_OPCODE_U64MUL:
6163 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
6164 break;
6165 case TGSI_OPCODE_U64SHL:
6166 exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
6167 break;
6168 case TGSI_OPCODE_I64SHR:
6169 exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
6170 break;
6171 case TGSI_OPCODE_U64SHR:
6172 exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
6173 break;
6174 case TGSI_OPCODE_U64DIV:
6175 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
6176 break;
6177 case TGSI_OPCODE_I64DIV:
6178 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
6179 break;
6180 case TGSI_OPCODE_U64MOD:
6181 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
6182 break;
6183 case TGSI_OPCODE_I64MOD:
6184 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
6185 break;
6186
6187 case TGSI_OPCODE_F2U64:
6188 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
6189 break;
6190
6191 case TGSI_OPCODE_F2I64:
6192 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
6193 break;
6194
6195 case TGSI_OPCODE_U2I64:
6196 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
6197 break;
6198 case TGSI_OPCODE_I2I64:
6199 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
6200 break;
6201
6202 case TGSI_OPCODE_D2U64:
6203 exec_double_unary(mach, inst, micro_d2u64);
6204 break;
6205
6206 case TGSI_OPCODE_D2I64:
6207 exec_double_unary(mach, inst, micro_d2i64);
6208 break;
6209
6210 case TGSI_OPCODE_U642F:
6211 exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT);
6212 break;
6213 case TGSI_OPCODE_I642F:
6214 exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT);
6215 break;
6216
6217 case TGSI_OPCODE_U642D:
6218 exec_double_unary(mach, inst, micro_u642d);
6219 break;
6220 case TGSI_OPCODE_I642D:
6221 exec_double_unary(mach, inst, micro_i642d);
6222 break;
6223
6224 default:
6225 assert( 0 );
6226 }
6227 return FALSE;
6228 }
6229
6230 static void
tgsi_exec_machine_setup_masks(struct tgsi_exec_machine * mach)6231 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
6232 {
6233 uint default_mask = 0xf;
6234
6235 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0;
6236 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0;
6237
6238 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
6239 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0;
6240 mach->Primitives[0] = 0;
6241 /* GS runs on a single primitive for now */
6242 default_mask = 0x1;
6243 }
6244
6245 if (mach->NonHelperMask == 0)
6246 mach->NonHelperMask = default_mask;
6247 mach->CondMask = default_mask;
6248 mach->LoopMask = default_mask;
6249 mach->ContMask = default_mask;
6250 mach->FuncMask = default_mask;
6251 mach->ExecMask = default_mask;
6252
6253 mach->Switch.mask = default_mask;
6254
6255 assert(mach->CondStackTop == 0);
6256 assert(mach->LoopStackTop == 0);
6257 assert(mach->ContStackTop == 0);
6258 assert(mach->SwitchStackTop == 0);
6259 assert(mach->BreakStackTop == 0);
6260 assert(mach->CallStackTop == 0);
6261 }
6262
6263 /**
6264 * Run TGSI interpreter.
6265 * \return bitmask of "alive" quad components
6266 */
6267 uint
tgsi_exec_machine_run(struct tgsi_exec_machine * mach,int start_pc)6268 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
6269 {
6270 uint i;
6271
6272 mach->pc = start_pc;
6273
6274 if (!start_pc) {
6275 tgsi_exec_machine_setup_masks(mach);
6276
6277 /* execute declarations (interpolants) */
6278 for (i = 0; i < mach->NumDeclarations; i++) {
6279 exec_declaration( mach, mach->Declarations+i );
6280 }
6281 }
6282
6283 {
6284 #if DEBUG_EXECUTION
6285 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
6286 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6287 uint inst = 1;
6288
6289 if (!start_pc) {
6290 memset(mach->Temps, 0, sizeof(temps));
6291 if (mach->Outputs)
6292 memset(mach->Outputs, 0, sizeof(outputs));
6293 memset(temps, 0, sizeof(temps));
6294 memset(outputs, 0, sizeof(outputs));
6295 }
6296 #endif
6297
6298 /* execute instructions, until pc is set to -1 */
6299 while (mach->pc != -1) {
6300 boolean barrier_hit;
6301 #if DEBUG_EXECUTION
6302 uint i;
6303
6304 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6305 #endif
6306
6307 assert(mach->pc < (int) mach->NumInstructions);
6308 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
6309
6310 /* for compute shaders if we hit a barrier return now for later rescheduling */
6311 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
6312 return 0;
6313
6314 #if DEBUG_EXECUTION
6315 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
6316 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6317 uint j;
6318
6319 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6320 debug_printf("TEMP[%2u] = ", i);
6321 for (j = 0; j < 4; j++) {
6322 if (j > 0) {
6323 debug_printf(" ");
6324 }
6325 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6326 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6327 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6328 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6329 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6330 }
6331 }
6332 }
6333 if (mach->Outputs) {
6334 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
6335 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
6336 uint j;
6337
6338 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
6339 debug_printf("OUT[%2u] = ", i);
6340 for (j = 0; j < 4; j++) {
6341 if (j > 0) {
6342 debug_printf(" ");
6343 }
6344 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6345 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
6346 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
6347 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
6348 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6349 }
6350 }
6351 }
6352 }
6353 #endif
6354 }
6355 }
6356
6357 #if 0
6358 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6359 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
6360 /*
6361 * Scale back depth component.
6362 */
6363 for (i = 0; i < 4; i++)
6364 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
6365 }
6366 #endif
6367
6368 /* Strictly speaking, these assertions aren't really needed but they
6369 * can potentially catch some bugs in the control flow code.
6370 */
6371 assert(mach->CondStackTop == 0);
6372 assert(mach->LoopStackTop == 0);
6373 assert(mach->ContStackTop == 0);
6374 assert(mach->SwitchStackTop == 0);
6375 assert(mach->BreakStackTop == 0);
6376 assert(mach->CallStackTop == 0);
6377
6378 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
6379 }
6380