1 /**************************************************************************
2 *
3 * Copyright 2007-2008 VMware, Inc.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "pipe/p_compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/compiler.h"
62 #include "util/half_float.h"
63 #include "util/u_memory.h"
64 #include "util/u_math.h"
65 #include "util/rounding.h"
66
67
68 #define DEBUG_EXECUTION 0
69
70
71 #define TILE_TOP_LEFT 0
72 #define TILE_TOP_RIGHT 1
73 #define TILE_BOTTOM_LEFT 2
74 #define TILE_BOTTOM_RIGHT 3
75
76 union tgsi_double_channel {
77 double d[TGSI_QUAD_SIZE];
78 unsigned u[TGSI_QUAD_SIZE][2];
79 uint64_t u64[TGSI_QUAD_SIZE];
80 int64_t i64[TGSI_QUAD_SIZE];
81 } ALIGN16;
82
83 struct ALIGN16 tgsi_double_vector {
84 union tgsi_double_channel xy;
85 union tgsi_double_channel zw;
86 };
87
88 static void
micro_abs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)89 micro_abs(union tgsi_exec_channel *dst,
90 const union tgsi_exec_channel *src)
91 {
92 dst->f[0] = fabsf(src->f[0]);
93 dst->f[1] = fabsf(src->f[1]);
94 dst->f[2] = fabsf(src->f[2]);
95 dst->f[3] = fabsf(src->f[3]);
96 }
97
98 static void
micro_arl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)99 micro_arl(union tgsi_exec_channel *dst,
100 const union tgsi_exec_channel *src)
101 {
102 dst->i[0] = (int)floorf(src->f[0]);
103 dst->i[1] = (int)floorf(src->f[1]);
104 dst->i[2] = (int)floorf(src->f[2]);
105 dst->i[3] = (int)floorf(src->f[3]);
106 }
107
108 static void
micro_arr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)109 micro_arr(union tgsi_exec_channel *dst,
110 const union tgsi_exec_channel *src)
111 {
112 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
113 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
114 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
115 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
116 }
117
118 static void
micro_ceil(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)119 micro_ceil(union tgsi_exec_channel *dst,
120 const union tgsi_exec_channel *src)
121 {
122 dst->f[0] = ceilf(src->f[0]);
123 dst->f[1] = ceilf(src->f[1]);
124 dst->f[2] = ceilf(src->f[2]);
125 dst->f[3] = ceilf(src->f[3]);
126 }
127
128 static void
micro_cmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)129 micro_cmp(union tgsi_exec_channel *dst,
130 const union tgsi_exec_channel *src0,
131 const union tgsi_exec_channel *src1,
132 const union tgsi_exec_channel *src2)
133 {
134 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
135 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
136 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
137 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
138 }
139
140 static void
micro_cos(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)141 micro_cos(union tgsi_exec_channel *dst,
142 const union tgsi_exec_channel *src)
143 {
144 dst->f[0] = cosf(src->f[0]);
145 dst->f[1] = cosf(src->f[1]);
146 dst->f[2] = cosf(src->f[2]);
147 dst->f[3] = cosf(src->f[3]);
148 }
149
150 static void
micro_d2f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)151 micro_d2f(union tgsi_exec_channel *dst,
152 const union tgsi_double_channel *src)
153 {
154 dst->f[0] = (float)src->d[0];
155 dst->f[1] = (float)src->d[1];
156 dst->f[2] = (float)src->d[2];
157 dst->f[3] = (float)src->d[3];
158 }
159
160 static void
micro_d2i(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)161 micro_d2i(union tgsi_exec_channel *dst,
162 const union tgsi_double_channel *src)
163 {
164 dst->i[0] = (int)src->d[0];
165 dst->i[1] = (int)src->d[1];
166 dst->i[2] = (int)src->d[2];
167 dst->i[3] = (int)src->d[3];
168 }
169
170 static void
micro_d2u(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)171 micro_d2u(union tgsi_exec_channel *dst,
172 const union tgsi_double_channel *src)
173 {
174 dst->u[0] = (unsigned)src->d[0];
175 dst->u[1] = (unsigned)src->d[1];
176 dst->u[2] = (unsigned)src->d[2];
177 dst->u[3] = (unsigned)src->d[3];
178 }
179 static void
micro_dabs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)180 micro_dabs(union tgsi_double_channel *dst,
181 const union tgsi_double_channel *src)
182 {
183 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
184 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
185 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
186 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
187 }
188
189 static void
micro_dadd(union tgsi_double_channel * dst,const union tgsi_double_channel * src)190 micro_dadd(union tgsi_double_channel *dst,
191 const union tgsi_double_channel *src)
192 {
193 dst->d[0] = src[0].d[0] + src[1].d[0];
194 dst->d[1] = src[0].d[1] + src[1].d[1];
195 dst->d[2] = src[0].d[2] + src[1].d[2];
196 dst->d[3] = src[0].d[3] + src[1].d[3];
197 }
198
199 static void
micro_ddiv(union tgsi_double_channel * dst,const union tgsi_double_channel * src)200 micro_ddiv(union tgsi_double_channel *dst,
201 const union tgsi_double_channel *src)
202 {
203 dst->d[0] = src[0].d[0] / src[1].d[0];
204 dst->d[1] = src[0].d[1] / src[1].d[1];
205 dst->d[2] = src[0].d[2] / src[1].d[2];
206 dst->d[3] = src[0].d[3] / src[1].d[3];
207 }
208
209 static void
micro_ddx(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)210 micro_ddx(union tgsi_exec_channel *dst,
211 const union tgsi_exec_channel *src)
212 {
213 dst->f[0] =
214 dst->f[1] =
215 dst->f[2] =
216 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
217 }
218
219 static void
micro_ddx_fine(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)220 micro_ddx_fine(union tgsi_exec_channel *dst,
221 const union tgsi_exec_channel *src)
222 {
223 dst->f[0] =
224 dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];
225 dst->f[2] =
226 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
227 }
228
229
230 static void
micro_ddy(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)231 micro_ddy(union tgsi_exec_channel *dst,
232 const union tgsi_exec_channel *src)
233 {
234 dst->f[0] =
235 dst->f[1] =
236 dst->f[2] =
237 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
238 }
239
240 static void
micro_ddy_fine(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)241 micro_ddy_fine(union tgsi_exec_channel *dst,
242 const union tgsi_exec_channel *src)
243 {
244 dst->f[0] =
245 dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
246 dst->f[1] =
247 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];
248 }
249
250 static void
micro_dmul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)251 micro_dmul(union tgsi_double_channel *dst,
252 const union tgsi_double_channel *src)
253 {
254 dst->d[0] = src[0].d[0] * src[1].d[0];
255 dst->d[1] = src[0].d[1] * src[1].d[1];
256 dst->d[2] = src[0].d[2] * src[1].d[2];
257 dst->d[3] = src[0].d[3] * src[1].d[3];
258 }
259
260 static void
micro_dmax(union tgsi_double_channel * dst,const union tgsi_double_channel * src)261 micro_dmax(union tgsi_double_channel *dst,
262 const union tgsi_double_channel *src)
263 {
264 dst->d[0] = fmax(src[0].d[0], src[1].d[0]);
265 dst->d[1] = fmax(src[0].d[1], src[1].d[1]);
266 dst->d[2] = fmax(src[0].d[2], src[1].d[2]);
267 dst->d[3] = fmax(src[0].d[3], src[1].d[3]);
268 }
269
270 static void
micro_dmin(union tgsi_double_channel * dst,const union tgsi_double_channel * src)271 micro_dmin(union tgsi_double_channel *dst,
272 const union tgsi_double_channel *src)
273 {
274 dst->d[0] = fmin(src[0].d[0], src[1].d[0]);
275 dst->d[1] = fmin(src[0].d[1], src[1].d[1]);
276 dst->d[2] = fmin(src[0].d[2], src[1].d[2]);
277 dst->d[3] = fmin(src[0].d[3], src[1].d[3]);
278 }
279
280 static void
micro_dneg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)281 micro_dneg(union tgsi_double_channel *dst,
282 const union tgsi_double_channel *src)
283 {
284 dst->d[0] = -src->d[0];
285 dst->d[1] = -src->d[1];
286 dst->d[2] = -src->d[2];
287 dst->d[3] = -src->d[3];
288 }
289
290 static void
micro_dslt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)291 micro_dslt(union tgsi_double_channel *dst,
292 const union tgsi_double_channel *src)
293 {
294 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
295 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
296 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
297 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
298 }
299
300 static void
micro_dsne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)301 micro_dsne(union tgsi_double_channel *dst,
302 const union tgsi_double_channel *src)
303 {
304 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
305 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
306 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
307 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
308 }
309
310 static void
micro_dsge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)311 micro_dsge(union tgsi_double_channel *dst,
312 const union tgsi_double_channel *src)
313 {
314 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
315 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
316 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
317 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
318 }
319
320 static void
micro_dseq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)321 micro_dseq(union tgsi_double_channel *dst,
322 const union tgsi_double_channel *src)
323 {
324 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
325 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
326 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
327 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
328 }
329
330 static void
micro_drcp(union tgsi_double_channel * dst,const union tgsi_double_channel * src)331 micro_drcp(union tgsi_double_channel *dst,
332 const union tgsi_double_channel *src)
333 {
334 dst->d[0] = 1.0 / src->d[0];
335 dst->d[1] = 1.0 / src->d[1];
336 dst->d[2] = 1.0 / src->d[2];
337 dst->d[3] = 1.0 / src->d[3];
338 }
339
340 static void
micro_dsqrt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)341 micro_dsqrt(union tgsi_double_channel *dst,
342 const union tgsi_double_channel *src)
343 {
344 dst->d[0] = sqrt(src->d[0]);
345 dst->d[1] = sqrt(src->d[1]);
346 dst->d[2] = sqrt(src->d[2]);
347 dst->d[3] = sqrt(src->d[3]);
348 }
349
350 static void
micro_drsq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)351 micro_drsq(union tgsi_double_channel *dst,
352 const union tgsi_double_channel *src)
353 {
354 dst->d[0] = 1.0 / sqrt(src->d[0]);
355 dst->d[1] = 1.0 / sqrt(src->d[1]);
356 dst->d[2] = 1.0 / sqrt(src->d[2]);
357 dst->d[3] = 1.0 / sqrt(src->d[3]);
358 }
359
360 static void
micro_dmad(union tgsi_double_channel * dst,const union tgsi_double_channel * src)361 micro_dmad(union tgsi_double_channel *dst,
362 const union tgsi_double_channel *src)
363 {
364 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
365 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
366 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
367 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
368 }
369
370 static void
micro_dfrac(union tgsi_double_channel * dst,const union tgsi_double_channel * src)371 micro_dfrac(union tgsi_double_channel *dst,
372 const union tgsi_double_channel *src)
373 {
374 dst->d[0] = src->d[0] - floor(src->d[0]);
375 dst->d[1] = src->d[1] - floor(src->d[1]);
376 dst->d[2] = src->d[2] - floor(src->d[2]);
377 dst->d[3] = src->d[3] - floor(src->d[3]);
378 }
379
380 static void
micro_dflr(union tgsi_double_channel * dst,const union tgsi_double_channel * src)381 micro_dflr(union tgsi_double_channel *dst,
382 const union tgsi_double_channel *src)
383 {
384 dst->d[0] = floor(src->d[0]);
385 dst->d[1] = floor(src->d[1]);
386 dst->d[2] = floor(src->d[2]);
387 dst->d[3] = floor(src->d[3]);
388 }
389
390 static void
micro_dldexp(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)391 micro_dldexp(union tgsi_double_channel *dst,
392 const union tgsi_double_channel *src0,
393 union tgsi_exec_channel *src1)
394 {
395 dst->d[0] = ldexp(src0->d[0], src1->i[0]);
396 dst->d[1] = ldexp(src0->d[1], src1->i[1]);
397 dst->d[2] = ldexp(src0->d[2], src1->i[2]);
398 dst->d[3] = ldexp(src0->d[3], src1->i[3]);
399 }
400
401 static void
micro_dfracexp(union tgsi_double_channel * dst,union tgsi_exec_channel * dst_exp,const union tgsi_double_channel * src)402 micro_dfracexp(union tgsi_double_channel *dst,
403 union tgsi_exec_channel *dst_exp,
404 const union tgsi_double_channel *src)
405 {
406 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]);
407 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]);
408 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]);
409 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]);
410 }
411
412 static void
micro_exp2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)413 micro_exp2(union tgsi_exec_channel *dst,
414 const union tgsi_exec_channel *src)
415 {
416 #if DEBUG
417 /* Inf is okay for this instruction, so clamp it to silence assertions. */
418 uint i;
419 union tgsi_exec_channel clamped;
420
421 for (i = 0; i < 4; i++) {
422 if (src->f[i] > 127.99999f) {
423 clamped.f[i] = 127.99999f;
424 } else if (src->f[i] < -126.99999f) {
425 clamped.f[i] = -126.99999f;
426 } else {
427 clamped.f[i] = src->f[i];
428 }
429 }
430 src = &clamped;
431 #endif /* DEBUG */
432
433 dst->f[0] = powf(2.0f, src->f[0]);
434 dst->f[1] = powf(2.0f, src->f[1]);
435 dst->f[2] = powf(2.0f, src->f[2]);
436 dst->f[3] = powf(2.0f, src->f[3]);
437 }
438
439 static void
micro_f2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)440 micro_f2d(union tgsi_double_channel *dst,
441 const union tgsi_exec_channel *src)
442 {
443 dst->d[0] = (double)src->f[0];
444 dst->d[1] = (double)src->f[1];
445 dst->d[2] = (double)src->f[2];
446 dst->d[3] = (double)src->f[3];
447 }
448
449 static void
micro_flr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)450 micro_flr(union tgsi_exec_channel *dst,
451 const union tgsi_exec_channel *src)
452 {
453 dst->f[0] = floorf(src->f[0]);
454 dst->f[1] = floorf(src->f[1]);
455 dst->f[2] = floorf(src->f[2]);
456 dst->f[3] = floorf(src->f[3]);
457 }
458
459 static void
micro_frc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)460 micro_frc(union tgsi_exec_channel *dst,
461 const union tgsi_exec_channel *src)
462 {
463 dst->f[0] = src->f[0] - floorf(src->f[0]);
464 dst->f[1] = src->f[1] - floorf(src->f[1]);
465 dst->f[2] = src->f[2] - floorf(src->f[2]);
466 dst->f[3] = src->f[3] - floorf(src->f[3]);
467 }
468
469 static void
micro_i2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)470 micro_i2d(union tgsi_double_channel *dst,
471 const union tgsi_exec_channel *src)
472 {
473 dst->d[0] = (double)src->i[0];
474 dst->d[1] = (double)src->i[1];
475 dst->d[2] = (double)src->i[2];
476 dst->d[3] = (double)src->i[3];
477 }
478
479 static void
micro_iabs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)480 micro_iabs(union tgsi_exec_channel *dst,
481 const union tgsi_exec_channel *src)
482 {
483 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
484 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
485 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
486 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
487 }
488
489 static void
micro_ineg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)490 micro_ineg(union tgsi_exec_channel *dst,
491 const union tgsi_exec_channel *src)
492 {
493 dst->i[0] = -src->i[0];
494 dst->i[1] = -src->i[1];
495 dst->i[2] = -src->i[2];
496 dst->i[3] = -src->i[3];
497 }
498
499 static void
micro_lg2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)500 micro_lg2(union tgsi_exec_channel *dst,
501 const union tgsi_exec_channel *src)
502 {
503 dst->f[0] = logf(src->f[0]) * 1.442695f;
504 dst->f[1] = logf(src->f[1]) * 1.442695f;
505 dst->f[2] = logf(src->f[2]) * 1.442695f;
506 dst->f[3] = logf(src->f[3]) * 1.442695f;
507 }
508
509 static void
micro_lrp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)510 micro_lrp(union tgsi_exec_channel *dst,
511 const union tgsi_exec_channel *src0,
512 const union tgsi_exec_channel *src1,
513 const union tgsi_exec_channel *src2)
514 {
515 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
516 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
517 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
518 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
519 }
520
521 static void
micro_mad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)522 micro_mad(union tgsi_exec_channel *dst,
523 const union tgsi_exec_channel *src0,
524 const union tgsi_exec_channel *src1,
525 const union tgsi_exec_channel *src2)
526 {
527 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
528 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
529 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
530 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
531 }
532
533 static void
micro_mov(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)534 micro_mov(union tgsi_exec_channel *dst,
535 const union tgsi_exec_channel *src)
536 {
537 dst->u[0] = src->u[0];
538 dst->u[1] = src->u[1];
539 dst->u[2] = src->u[2];
540 dst->u[3] = src->u[3];
541 }
542
543 static void
micro_rcp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)544 micro_rcp(union tgsi_exec_channel *dst,
545 const union tgsi_exec_channel *src)
546 {
547 #if 0 /* for debugging */
548 assert(src->f[0] != 0.0f);
549 assert(src->f[1] != 0.0f);
550 assert(src->f[2] != 0.0f);
551 assert(src->f[3] != 0.0f);
552 #endif
553 dst->f[0] = 1.0f / src->f[0];
554 dst->f[1] = 1.0f / src->f[1];
555 dst->f[2] = 1.0f / src->f[2];
556 dst->f[3] = 1.0f / src->f[3];
557 }
558
559 static void
micro_rnd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)560 micro_rnd(union tgsi_exec_channel *dst,
561 const union tgsi_exec_channel *src)
562 {
563 dst->f[0] = _mesa_roundevenf(src->f[0]);
564 dst->f[1] = _mesa_roundevenf(src->f[1]);
565 dst->f[2] = _mesa_roundevenf(src->f[2]);
566 dst->f[3] = _mesa_roundevenf(src->f[3]);
567 }
568
569 static void
micro_rsq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)570 micro_rsq(union tgsi_exec_channel *dst,
571 const union tgsi_exec_channel *src)
572 {
573 #if 0 /* for debugging */
574 assert(src->f[0] != 0.0f);
575 assert(src->f[1] != 0.0f);
576 assert(src->f[2] != 0.0f);
577 assert(src->f[3] != 0.0f);
578 #endif
579 dst->f[0] = 1.0f / sqrtf(src->f[0]);
580 dst->f[1] = 1.0f / sqrtf(src->f[1]);
581 dst->f[2] = 1.0f / sqrtf(src->f[2]);
582 dst->f[3] = 1.0f / sqrtf(src->f[3]);
583 }
584
585 static void
micro_sqrt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)586 micro_sqrt(union tgsi_exec_channel *dst,
587 const union tgsi_exec_channel *src)
588 {
589 dst->f[0] = sqrtf(src->f[0]);
590 dst->f[1] = sqrtf(src->f[1]);
591 dst->f[2] = sqrtf(src->f[2]);
592 dst->f[3] = sqrtf(src->f[3]);
593 }
594
595 static void
micro_seq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)596 micro_seq(union tgsi_exec_channel *dst,
597 const union tgsi_exec_channel *src0,
598 const union tgsi_exec_channel *src1)
599 {
600 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
601 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
602 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
603 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
604 }
605
606 static void
micro_sge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)607 micro_sge(union tgsi_exec_channel *dst,
608 const union tgsi_exec_channel *src0,
609 const union tgsi_exec_channel *src1)
610 {
611 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
612 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
613 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
614 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
615 }
616
617 static void
micro_sgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)618 micro_sgn(union tgsi_exec_channel *dst,
619 const union tgsi_exec_channel *src)
620 {
621 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
622 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
623 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
624 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
625 }
626
627 static void
micro_isgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)628 micro_isgn(union tgsi_exec_channel *dst,
629 const union tgsi_exec_channel *src)
630 {
631 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
632 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
633 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
634 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
635 }
636
637 static void
micro_sgt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)638 micro_sgt(union tgsi_exec_channel *dst,
639 const union tgsi_exec_channel *src0,
640 const union tgsi_exec_channel *src1)
641 {
642 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
643 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
644 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
645 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
646 }
647
648 static void
micro_sin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)649 micro_sin(union tgsi_exec_channel *dst,
650 const union tgsi_exec_channel *src)
651 {
652 dst->f[0] = sinf(src->f[0]);
653 dst->f[1] = sinf(src->f[1]);
654 dst->f[2] = sinf(src->f[2]);
655 dst->f[3] = sinf(src->f[3]);
656 }
657
658 static void
micro_sle(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)659 micro_sle(union tgsi_exec_channel *dst,
660 const union tgsi_exec_channel *src0,
661 const union tgsi_exec_channel *src1)
662 {
663 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
664 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
665 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
666 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
667 }
668
669 static void
micro_slt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)670 micro_slt(union tgsi_exec_channel *dst,
671 const union tgsi_exec_channel *src0,
672 const union tgsi_exec_channel *src1)
673 {
674 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
675 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
676 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
677 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
678 }
679
680 static void
micro_sne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)681 micro_sne(union tgsi_exec_channel *dst,
682 const union tgsi_exec_channel *src0,
683 const union tgsi_exec_channel *src1)
684 {
685 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
686 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
687 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
688 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
689 }
690
691 static void
micro_trunc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)692 micro_trunc(union tgsi_exec_channel *dst,
693 const union tgsi_exec_channel *src)
694 {
695 dst->f[0] = truncf(src->f[0]);
696 dst->f[1] = truncf(src->f[1]);
697 dst->f[2] = truncf(src->f[2]);
698 dst->f[3] = truncf(src->f[3]);
699 }
700
701 static void
micro_u2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)702 micro_u2d(union tgsi_double_channel *dst,
703 const union tgsi_exec_channel *src)
704 {
705 dst->d[0] = (double)src->u[0];
706 dst->d[1] = (double)src->u[1];
707 dst->d[2] = (double)src->u[2];
708 dst->d[3] = (double)src->u[3];
709 }
710
711 static void
micro_i64abs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)712 micro_i64abs(union tgsi_double_channel *dst,
713 const union tgsi_double_channel *src)
714 {
715 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
716 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
717 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
718 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
719 }
720
721 static void
micro_i64sgn(union tgsi_double_channel * dst,const union tgsi_double_channel * src)722 micro_i64sgn(union tgsi_double_channel *dst,
723 const union tgsi_double_channel *src)
724 {
725 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
726 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
727 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
728 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
729 }
730
731 static void
micro_i64neg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)732 micro_i64neg(union tgsi_double_channel *dst,
733 const union tgsi_double_channel *src)
734 {
735 dst->i64[0] = -src->i64[0];
736 dst->i64[1] = -src->i64[1];
737 dst->i64[2] = -src->i64[2];
738 dst->i64[3] = -src->i64[3];
739 }
740
741 static void
micro_u64seq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)742 micro_u64seq(union tgsi_double_channel *dst,
743 const union tgsi_double_channel *src)
744 {
745 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
746 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
747 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
748 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
749 }
750
751 static void
micro_u64sne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)752 micro_u64sne(union tgsi_double_channel *dst,
753 const union tgsi_double_channel *src)
754 {
755 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
756 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
757 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
758 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
759 }
760
761 static void
micro_i64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)762 micro_i64slt(union tgsi_double_channel *dst,
763 const union tgsi_double_channel *src)
764 {
765 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
766 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
767 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
768 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
769 }
770
771 static void
micro_u64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)772 micro_u64slt(union tgsi_double_channel *dst,
773 const union tgsi_double_channel *src)
774 {
775 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
776 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
777 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
778 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
779 }
780
781 static void
micro_i64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)782 micro_i64sge(union tgsi_double_channel *dst,
783 const union tgsi_double_channel *src)
784 {
785 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
786 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
787 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
788 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
789 }
790
791 static void
micro_u64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)792 micro_u64sge(union tgsi_double_channel *dst,
793 const union tgsi_double_channel *src)
794 {
795 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
796 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
797 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
798 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
799 }
800
801 static void
micro_u64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)802 micro_u64max(union tgsi_double_channel *dst,
803 const union tgsi_double_channel *src)
804 {
805 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
806 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
807 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
808 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
809 }
810
811 static void
micro_i64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)812 micro_i64max(union tgsi_double_channel *dst,
813 const union tgsi_double_channel *src)
814 {
815 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
816 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
817 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
818 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
819 }
820
821 static void
micro_u64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)822 micro_u64min(union tgsi_double_channel *dst,
823 const union tgsi_double_channel *src)
824 {
825 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
826 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
827 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
828 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
829 }
830
831 static void
micro_i64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)832 micro_i64min(union tgsi_double_channel *dst,
833 const union tgsi_double_channel *src)
834 {
835 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
836 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
837 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
838 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
839 }
840
841 static void
micro_u64add(union tgsi_double_channel * dst,const union tgsi_double_channel * src)842 micro_u64add(union tgsi_double_channel *dst,
843 const union tgsi_double_channel *src)
844 {
845 dst->u64[0] = src[0].u64[0] + src[1].u64[0];
846 dst->u64[1] = src[0].u64[1] + src[1].u64[1];
847 dst->u64[2] = src[0].u64[2] + src[1].u64[2];
848 dst->u64[3] = src[0].u64[3] + src[1].u64[3];
849 }
850
851 static void
micro_u64mul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)852 micro_u64mul(union tgsi_double_channel *dst,
853 const union tgsi_double_channel *src)
854 {
855 dst->u64[0] = src[0].u64[0] * src[1].u64[0];
856 dst->u64[1] = src[0].u64[1] * src[1].u64[1];
857 dst->u64[2] = src[0].u64[2] * src[1].u64[2];
858 dst->u64[3] = src[0].u64[3] * src[1].u64[3];
859 }
860
861 static void
micro_u64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)862 micro_u64div(union tgsi_double_channel *dst,
863 const union tgsi_double_channel *src)
864 {
865 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
866 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
867 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
868 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
869 }
870
871 static void
micro_i64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)872 micro_i64div(union tgsi_double_channel *dst,
873 const union tgsi_double_channel *src)
874 {
875 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
876 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
877 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
878 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
879 }
880
881 static void
micro_u64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)882 micro_u64mod(union tgsi_double_channel *dst,
883 const union tgsi_double_channel *src)
884 {
885 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
886 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
887 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
888 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
889 }
890
891 static void
micro_i64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)892 micro_i64mod(union tgsi_double_channel *dst,
893 const union tgsi_double_channel *src)
894 {
895 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
896 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
897 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
898 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
899 }
900
901 static void
micro_u64shl(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)902 micro_u64shl(union tgsi_double_channel *dst,
903 const union tgsi_double_channel *src0,
904 union tgsi_exec_channel *src1)
905 {
906 unsigned masked_count;
907 masked_count = src1->u[0] & 0x3f;
908 dst->u64[0] = src0->u64[0] << masked_count;
909 masked_count = src1->u[1] & 0x3f;
910 dst->u64[1] = src0->u64[1] << masked_count;
911 masked_count = src1->u[2] & 0x3f;
912 dst->u64[2] = src0->u64[2] << masked_count;
913 masked_count = src1->u[3] & 0x3f;
914 dst->u64[3] = src0->u64[3] << masked_count;
915 }
916
917 static void
micro_i64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)918 micro_i64shr(union tgsi_double_channel *dst,
919 const union tgsi_double_channel *src0,
920 union tgsi_exec_channel *src1)
921 {
922 unsigned masked_count;
923 masked_count = src1->u[0] & 0x3f;
924 dst->i64[0] = src0->i64[0] >> masked_count;
925 masked_count = src1->u[1] & 0x3f;
926 dst->i64[1] = src0->i64[1] >> masked_count;
927 masked_count = src1->u[2] & 0x3f;
928 dst->i64[2] = src0->i64[2] >> masked_count;
929 masked_count = src1->u[3] & 0x3f;
930 dst->i64[3] = src0->i64[3] >> masked_count;
931 }
932
933 static void
micro_u64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)934 micro_u64shr(union tgsi_double_channel *dst,
935 const union tgsi_double_channel *src0,
936 union tgsi_exec_channel *src1)
937 {
938 unsigned masked_count;
939 masked_count = src1->u[0] & 0x3f;
940 dst->u64[0] = src0->u64[0] >> masked_count;
941 masked_count = src1->u[1] & 0x3f;
942 dst->u64[1] = src0->u64[1] >> masked_count;
943 masked_count = src1->u[2] & 0x3f;
944 dst->u64[2] = src0->u64[2] >> masked_count;
945 masked_count = src1->u[3] & 0x3f;
946 dst->u64[3] = src0->u64[3] >> masked_count;
947 }
948
949 enum tgsi_exec_datatype {
950 TGSI_EXEC_DATA_FLOAT,
951 TGSI_EXEC_DATA_INT,
952 TGSI_EXEC_DATA_UINT,
953 TGSI_EXEC_DATA_DOUBLE,
954 TGSI_EXEC_DATA_INT64,
955 TGSI_EXEC_DATA_UINT64,
956 };
957
958 /** The execution mask depends on the conditional mask and the loop mask */
959 #define UPDATE_EXEC_MASK(MACH) \
960 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
961
962
963 static const union tgsi_exec_channel ZeroVec =
964 { { 0.0, 0.0, 0.0, 0.0 } };
965
966 static const union tgsi_exec_channel OneVec = {
967 {1.0f, 1.0f, 1.0f, 1.0f}
968 };
969
970 static const union tgsi_exec_channel P128Vec = {
971 {128.0f, 128.0f, 128.0f, 128.0f}
972 };
973
974 static const union tgsi_exec_channel M128Vec = {
975 {-128.0f, -128.0f, -128.0f, -128.0f}
976 };
977
978
979 /**
980 * Assert that none of the float values in 'chan' are infinite or NaN.
981 * NaN and Inf may occur normally during program execution and should
982 * not lead to crashes, etc. But when debugging, it's helpful to catch
983 * them.
984 */
985 static inline void
check_inf_or_nan(const union tgsi_exec_channel * chan)986 check_inf_or_nan(const union tgsi_exec_channel *chan)
987 {
988 assert(!util_is_inf_or_nan((chan)->f[0]));
989 assert(!util_is_inf_or_nan((chan)->f[1]));
990 assert(!util_is_inf_or_nan((chan)->f[2]));
991 assert(!util_is_inf_or_nan((chan)->f[3]));
992 }
993
994
995 #ifdef DEBUG
996 static void
print_chan(const char * msg,const union tgsi_exec_channel * chan)997 print_chan(const char *msg, const union tgsi_exec_channel *chan)
998 {
999 debug_printf("%s = {%f, %f, %f, %f}\n",
1000 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
1001 }
1002 #endif
1003
1004
1005 #ifdef DEBUG
1006 static void
print_temp(const struct tgsi_exec_machine * mach,uint index)1007 print_temp(const struct tgsi_exec_machine *mach, uint index)
1008 {
1009 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
1010 int i;
1011 debug_printf("Temp[%u] =\n", index);
1012 for (i = 0; i < 4; i++) {
1013 debug_printf(" %c: { %f, %f, %f, %f }\n",
1014 "XYZW"[i],
1015 tmp->xyzw[i].f[0],
1016 tmp->xyzw[i].f[1],
1017 tmp->xyzw[i].f[2],
1018 tmp->xyzw[i].f[3]);
1019 }
1020 }
1021 #endif
1022
1023
1024 void
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine * mach,unsigned num_bufs,const void ** bufs,const unsigned * buf_sizes)1025 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1026 unsigned num_bufs,
1027 const void **bufs,
1028 const unsigned *buf_sizes)
1029 {
1030 unsigned i;
1031
1032 for (i = 0; i < num_bufs; i++) {
1033 mach->Consts[i] = bufs[i];
1034 mach->ConstsSize[i] = buf_sizes[i];
1035 }
1036 }
1037
1038 /**
1039 * Initialize machine state by expanding tokens to full instructions,
1040 * allocating temporary storage, setting up constants, etc.
1041 * After this, we can call tgsi_exec_machine_run() many times.
1042 */
1043 void
tgsi_exec_machine_bind_shader(struct tgsi_exec_machine * mach,const struct tgsi_token * tokens,struct tgsi_sampler * sampler,struct tgsi_image * image,struct tgsi_buffer * buffer)1044 tgsi_exec_machine_bind_shader(
1045 struct tgsi_exec_machine *mach,
1046 const struct tgsi_token *tokens,
1047 struct tgsi_sampler *sampler,
1048 struct tgsi_image *image,
1049 struct tgsi_buffer *buffer)
1050 {
1051 uint k;
1052 struct tgsi_parse_context parse;
1053 struct tgsi_full_instruction *instructions;
1054 struct tgsi_full_declaration *declarations;
1055 uint maxInstructions = 10, numInstructions = 0;
1056 uint maxDeclarations = 10, numDeclarations = 0;
1057
1058 #if 0
1059 tgsi_dump(tokens, 0);
1060 #endif
1061
1062 mach->Tokens = tokens;
1063 mach->Sampler = sampler;
1064 mach->Image = image;
1065 mach->Buffer = buffer;
1066
1067 if (!tokens) {
1068 /* unbind and free all */
1069 FREE(mach->Declarations);
1070 mach->Declarations = NULL;
1071 mach->NumDeclarations = 0;
1072
1073 FREE(mach->Instructions);
1074 mach->Instructions = NULL;
1075 mach->NumInstructions = 0;
1076
1077 return;
1078 }
1079
1080 k = tgsi_parse_init (&parse, mach->Tokens);
1081 if (k != TGSI_PARSE_OK) {
1082 debug_printf( "Problem parsing!\n" );
1083 return;
1084 }
1085
1086 mach->ImmLimit = 0;
1087 mach->NumOutputs = 0;
1088
1089 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1090 mach->SysSemanticToIndex[k] = -1;
1091
1092 if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1093 !mach->UsedGeometryShader) {
1094 struct tgsi_exec_vector *inputs;
1095 struct tgsi_exec_vector *outputs;
1096
1097 inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1098 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1099 16);
1100
1101 if (!inputs)
1102 return;
1103
1104 outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1105 TGSI_MAX_TOTAL_VERTICES, 16);
1106
1107 if (!outputs) {
1108 align_free(inputs);
1109 return;
1110 }
1111
1112 align_free(mach->Inputs);
1113 align_free(mach->Outputs);
1114
1115 mach->Inputs = inputs;
1116 mach->Outputs = outputs;
1117 mach->UsedGeometryShader = TRUE;
1118 }
1119
1120 declarations = (struct tgsi_full_declaration *)
1121 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1122
1123 if (!declarations) {
1124 return;
1125 }
1126
1127 instructions = (struct tgsi_full_instruction *)
1128 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1129
1130 if (!instructions) {
1131 FREE( declarations );
1132 return;
1133 }
1134
1135 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1136 uint i;
1137
1138 tgsi_parse_token( &parse );
1139 switch( parse.FullToken.Token.Type ) {
1140 case TGSI_TOKEN_TYPE_DECLARATION:
1141 /* save expanded declaration */
1142 if (numDeclarations == maxDeclarations) {
1143 declarations = REALLOC(declarations,
1144 maxDeclarations
1145 * sizeof(struct tgsi_full_declaration),
1146 (maxDeclarations + 10)
1147 * sizeof(struct tgsi_full_declaration));
1148 maxDeclarations += 10;
1149 }
1150 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT)
1151 mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1);
1152 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1153 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1154 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1155 }
1156
1157 memcpy(declarations + numDeclarations,
1158 &parse.FullToken.FullDeclaration,
1159 sizeof(declarations[0]));
1160 numDeclarations++;
1161 break;
1162
1163 case TGSI_TOKEN_TYPE_IMMEDIATE:
1164 {
1165 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1166 assert( size <= 4 );
1167 if (mach->ImmLimit >= mach->ImmsReserved) {
1168 unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128;
1169 float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4));
1170 if (imms) {
1171 mach->ImmsReserved = newReserved;
1172 mach->Imms = imms;
1173 } else {
1174 debug_printf("Unable to (re)allocate space for immidiate constants\n");
1175 break;
1176 }
1177 }
1178
1179 for( i = 0; i < size; i++ ) {
1180 mach->Imms[mach->ImmLimit][i] =
1181 parse.FullToken.FullImmediate.u[i].Float;
1182 }
1183 mach->ImmLimit += 1;
1184 }
1185 break;
1186
1187 case TGSI_TOKEN_TYPE_INSTRUCTION:
1188
1189 /* save expanded instruction */
1190 if (numInstructions == maxInstructions) {
1191 instructions = REALLOC(instructions,
1192 maxInstructions
1193 * sizeof(struct tgsi_full_instruction),
1194 (maxInstructions + 10)
1195 * sizeof(struct tgsi_full_instruction));
1196 maxInstructions += 10;
1197 }
1198
1199 memcpy(instructions + numInstructions,
1200 &parse.FullToken.FullInstruction,
1201 sizeof(instructions[0]));
1202
1203 numInstructions++;
1204 break;
1205
1206 case TGSI_TOKEN_TYPE_PROPERTY:
1207 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1208 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1209 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1210 }
1211 }
1212 break;
1213
1214 default:
1215 assert( 0 );
1216 }
1217 }
1218 tgsi_parse_free (&parse);
1219
1220 FREE(mach->Declarations);
1221 mach->Declarations = declarations;
1222 mach->NumDeclarations = numDeclarations;
1223
1224 FREE(mach->Instructions);
1225 mach->Instructions = instructions;
1226 mach->NumInstructions = numInstructions;
1227 }
1228
1229
1230 struct tgsi_exec_machine *
tgsi_exec_machine_create(enum pipe_shader_type shader_type)1231 tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1232 {
1233 struct tgsi_exec_machine *mach;
1234
1235 mach = align_malloc( sizeof *mach, 16 );
1236 if (!mach)
1237 goto fail;
1238
1239 memset(mach, 0, sizeof(*mach));
1240
1241 mach->ShaderType = shader_type;
1242
1243 if (shader_type != PIPE_SHADER_COMPUTE) {
1244 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1245 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1246 if (!mach->Inputs || !mach->Outputs)
1247 goto fail;
1248 }
1249
1250 if (shader_type == PIPE_SHADER_FRAGMENT) {
1251 mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16);
1252 if (!mach->InputSampleOffsetApply)
1253 goto fail;
1254 }
1255
1256 #ifdef DEBUG
1257 /* silence warnings */
1258 (void) print_chan;
1259 (void) print_temp;
1260 #endif
1261
1262 return mach;
1263
1264 fail:
1265 if (mach) {
1266 align_free(mach->InputSampleOffsetApply);
1267 align_free(mach->Inputs);
1268 align_free(mach->Outputs);
1269 align_free(mach);
1270 }
1271 return NULL;
1272 }
1273
1274
1275 void
tgsi_exec_machine_destroy(struct tgsi_exec_machine * mach)1276 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1277 {
1278 if (mach) {
1279 FREE(mach->Instructions);
1280 FREE(mach->Declarations);
1281 FREE(mach->Imms);
1282
1283 align_free(mach->InputSampleOffsetApply);
1284 align_free(mach->Inputs);
1285 align_free(mach->Outputs);
1286
1287 align_free(mach);
1288 }
1289 }
1290
1291 static void
micro_add(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1292 micro_add(union tgsi_exec_channel *dst,
1293 const union tgsi_exec_channel *src0,
1294 const union tgsi_exec_channel *src1)
1295 {
1296 dst->f[0] = src0->f[0] + src1->f[0];
1297 dst->f[1] = src0->f[1] + src1->f[1];
1298 dst->f[2] = src0->f[2] + src1->f[2];
1299 dst->f[3] = src0->f[3] + src1->f[3];
1300 }
1301
1302 static void
micro_div(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1303 micro_div(
1304 union tgsi_exec_channel *dst,
1305 const union tgsi_exec_channel *src0,
1306 const union tgsi_exec_channel *src1 )
1307 {
1308 if (src1->f[0] != 0) {
1309 dst->f[0] = src0->f[0] / src1->f[0];
1310 }
1311 if (src1->f[1] != 0) {
1312 dst->f[1] = src0->f[1] / src1->f[1];
1313 }
1314 if (src1->f[2] != 0) {
1315 dst->f[2] = src0->f[2] / src1->f[2];
1316 }
1317 if (src1->f[3] != 0) {
1318 dst->f[3] = src0->f[3] / src1->f[3];
1319 }
1320 }
1321
1322 static void
micro_lt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)1323 micro_lt(
1324 union tgsi_exec_channel *dst,
1325 const union tgsi_exec_channel *src0,
1326 const union tgsi_exec_channel *src1,
1327 const union tgsi_exec_channel *src2,
1328 const union tgsi_exec_channel *src3 )
1329 {
1330 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1331 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1332 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1333 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1334 }
1335
1336 static void
micro_max(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1337 micro_max(union tgsi_exec_channel *dst,
1338 const union tgsi_exec_channel *src0,
1339 const union tgsi_exec_channel *src1)
1340 {
1341 dst->f[0] = fmaxf(src0->f[0], src1->f[0]);
1342 dst->f[1] = fmaxf(src0->f[1], src1->f[1]);
1343 dst->f[2] = fmaxf(src0->f[2], src1->f[2]);
1344 dst->f[3] = fmaxf(src0->f[3], src1->f[3]);
1345 }
1346
1347 static void
micro_min(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1348 micro_min(union tgsi_exec_channel *dst,
1349 const union tgsi_exec_channel *src0,
1350 const union tgsi_exec_channel *src1)
1351 {
1352 dst->f[0] = fminf(src0->f[0], src1->f[0]);
1353 dst->f[1] = fminf(src0->f[1], src1->f[1]);
1354 dst->f[2] = fminf(src0->f[2], src1->f[2]);
1355 dst->f[3] = fminf(src0->f[3], src1->f[3]);
1356 }
1357
1358 static void
micro_mul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1359 micro_mul(union tgsi_exec_channel *dst,
1360 const union tgsi_exec_channel *src0,
1361 const union tgsi_exec_channel *src1)
1362 {
1363 dst->f[0] = src0->f[0] * src1->f[0];
1364 dst->f[1] = src0->f[1] * src1->f[1];
1365 dst->f[2] = src0->f[2] * src1->f[2];
1366 dst->f[3] = src0->f[3] * src1->f[3];
1367 }
1368
1369 static void
micro_neg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)1370 micro_neg(
1371 union tgsi_exec_channel *dst,
1372 const union tgsi_exec_channel *src )
1373 {
1374 dst->f[0] = -src->f[0];
1375 dst->f[1] = -src->f[1];
1376 dst->f[2] = -src->f[2];
1377 dst->f[3] = -src->f[3];
1378 }
1379
1380 static void
micro_pow(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1381 micro_pow(
1382 union tgsi_exec_channel *dst,
1383 const union tgsi_exec_channel *src0,
1384 const union tgsi_exec_channel *src1 )
1385 {
1386 dst->f[0] = powf( src0->f[0], src1->f[0] );
1387 dst->f[1] = powf( src0->f[1], src1->f[1] );
1388 dst->f[2] = powf( src0->f[2], src1->f[2] );
1389 dst->f[3] = powf( src0->f[3], src1->f[3] );
1390 }
1391
1392 static void
micro_ldexp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1393 micro_ldexp(union tgsi_exec_channel *dst,
1394 const union tgsi_exec_channel *src0,
1395 const union tgsi_exec_channel *src1)
1396 {
1397 dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
1398 dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
1399 dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
1400 dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
1401 }
1402
1403 static void
micro_sub(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1404 micro_sub(union tgsi_exec_channel *dst,
1405 const union tgsi_exec_channel *src0,
1406 const union tgsi_exec_channel *src1)
1407 {
1408 dst->f[0] = src0->f[0] - src1->f[0];
1409 dst->f[1] = src0->f[1] - src1->f[1];
1410 dst->f[2] = src0->f[2] - src1->f[2];
1411 dst->f[3] = src0->f[3] - src1->f[3];
1412 }
1413
1414 static void
fetch_src_file_channel(const struct tgsi_exec_machine * mach,const uint file,const uint swizzle,const union tgsi_exec_channel * index,const union tgsi_exec_channel * index2D,union tgsi_exec_channel * chan)1415 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1416 const uint file,
1417 const uint swizzle,
1418 const union tgsi_exec_channel *index,
1419 const union tgsi_exec_channel *index2D,
1420 union tgsi_exec_channel *chan)
1421 {
1422 uint i;
1423
1424 assert(swizzle < 4);
1425
1426 switch (file) {
1427 case TGSI_FILE_CONSTANT:
1428 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1429 /* NOTE: copying the const value as a uint instead of float */
1430 const uint constbuf = index2D->i[i];
1431 const unsigned pos = index->i[i] * 4 + swizzle;
1432 /* const buffer bounds check */
1433 if (pos >= mach->ConstsSize[constbuf] / 4) {
1434 if (0) {
1435 /* Debug: print warning */
1436 static int count = 0;
1437 if (count++ < 100)
1438 debug_printf("TGSI Exec: const buffer index %d"
1439 " out of bounds\n", pos);
1440 }
1441 chan->u[i] = 0;
1442 } else {
1443 const uint *buf = (const uint *)mach->Consts[constbuf];
1444 chan->u[i] = buf[pos];
1445 }
1446 }
1447 break;
1448
1449 case TGSI_FILE_INPUT:
1450 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1451 /*
1452 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1453 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1454 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1455 index2D->i[i], index->i[i]);
1456 }*/
1457 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1458 assert(pos >= 0);
1459 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1460 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1461 }
1462 break;
1463
1464 case TGSI_FILE_SYSTEM_VALUE:
1465 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1466 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1467 }
1468 break;
1469
1470 case TGSI_FILE_TEMPORARY:
1471 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1472 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1473 assert(index2D->i[i] == 0);
1474
1475 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1476 }
1477 break;
1478
1479 case TGSI_FILE_IMMEDIATE:
1480 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1481 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1482 assert(index2D->i[i] == 0);
1483
1484 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1485 }
1486 break;
1487
1488 case TGSI_FILE_ADDRESS:
1489 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1490 assert(index->i[i] >= 0 && index->i[i] < ARRAY_SIZE(mach->Addrs));
1491 assert(index2D->i[i] == 0);
1492
1493 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1494 }
1495 break;
1496
1497 case TGSI_FILE_OUTPUT:
1498 /* vertex/fragment output vars can be read too */
1499 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1500 assert(index->i[i] >= 0);
1501 assert(index2D->i[i] == 0);
1502
1503 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1504 }
1505 break;
1506
1507 default:
1508 assert(0);
1509 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1510 chan->u[i] = 0;
1511 }
1512 }
1513 }
1514
1515 static void
get_index_registers(const struct tgsi_exec_machine * mach,const struct tgsi_full_src_register * reg,union tgsi_exec_channel * index,union tgsi_exec_channel * index2D)1516 get_index_registers(const struct tgsi_exec_machine *mach,
1517 const struct tgsi_full_src_register *reg,
1518 union tgsi_exec_channel *index,
1519 union tgsi_exec_channel *index2D)
1520 {
1521 uint swizzle;
1522
1523 /* We start with a direct index into a register file.
1524 *
1525 * file[1],
1526 * where:
1527 * file = Register.File
1528 * [1] = Register.Index
1529 */
1530 index->i[0] =
1531 index->i[1] =
1532 index->i[2] =
1533 index->i[3] = reg->Register.Index;
1534
1535 /* There is an extra source register that indirectly subscripts
1536 * a register file. The direct index now becomes an offset
1537 * that is being added to the indirect register.
1538 *
1539 * file[ind[2].x+1],
1540 * where:
1541 * ind = Indirect.File
1542 * [2] = Indirect.Index
1543 * .x = Indirect.SwizzleX
1544 */
1545 if (reg->Register.Indirect) {
1546 union tgsi_exec_channel index2;
1547 union tgsi_exec_channel indir_index;
1548 const uint execmask = mach->ExecMask;
1549 uint i;
1550
1551 /* which address register (always zero now) */
1552 index2.i[0] =
1553 index2.i[1] =
1554 index2.i[2] =
1555 index2.i[3] = reg->Indirect.Index;
1556 /* get current value of address register[swizzle] */
1557 swizzle = reg->Indirect.Swizzle;
1558 fetch_src_file_channel(mach,
1559 reg->Indirect.File,
1560 swizzle,
1561 &index2,
1562 &ZeroVec,
1563 &indir_index);
1564
1565 /* add value of address register to the offset */
1566 index->i[0] += indir_index.i[0];
1567 index->i[1] += indir_index.i[1];
1568 index->i[2] += indir_index.i[2];
1569 index->i[3] += indir_index.i[3];
1570
1571 /* for disabled execution channels, zero-out the index to
1572 * avoid using a potential garbage value.
1573 */
1574 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1575 if ((execmask & (1 << i)) == 0)
1576 index->i[i] = 0;
1577 }
1578 }
1579
1580 /* There is an extra source register that is a second
1581 * subscript to a register file. Effectively it means that
1582 * the register file is actually a 2D array of registers.
1583 *
1584 * file[3][1],
1585 * where:
1586 * [3] = Dimension.Index
1587 */
1588 if (reg->Register.Dimension) {
1589 index2D->i[0] =
1590 index2D->i[1] =
1591 index2D->i[2] =
1592 index2D->i[3] = reg->Dimension.Index;
1593
1594 /* Again, the second subscript index can be addressed indirectly
1595 * identically to the first one.
1596 * Nothing stops us from indirectly addressing the indirect register,
1597 * but there is no need for that, so we won't exercise it.
1598 *
1599 * file[ind[4].y+3][1],
1600 * where:
1601 * ind = DimIndirect.File
1602 * [4] = DimIndirect.Index
1603 * .y = DimIndirect.SwizzleX
1604 */
1605 if (reg->Dimension.Indirect) {
1606 union tgsi_exec_channel index2;
1607 union tgsi_exec_channel indir_index;
1608 const uint execmask = mach->ExecMask;
1609 uint i;
1610
1611 index2.i[0] =
1612 index2.i[1] =
1613 index2.i[2] =
1614 index2.i[3] = reg->DimIndirect.Index;
1615
1616 swizzle = reg->DimIndirect.Swizzle;
1617 fetch_src_file_channel(mach,
1618 reg->DimIndirect.File,
1619 swizzle,
1620 &index2,
1621 &ZeroVec,
1622 &indir_index);
1623
1624 index2D->i[0] += indir_index.i[0];
1625 index2D->i[1] += indir_index.i[1];
1626 index2D->i[2] += indir_index.i[2];
1627 index2D->i[3] += indir_index.i[3];
1628
1629 /* for disabled execution channels, zero-out the index to
1630 * avoid using a potential garbage value.
1631 */
1632 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1633 if ((execmask & (1 << i)) == 0) {
1634 index2D->i[i] = 0;
1635 }
1636 }
1637 }
1638
1639 /* If by any chance there was a need for a 3D array of register
1640 * files, we would have to check whether Dimension is followed
1641 * by a dimension register and continue the saga.
1642 */
1643 } else {
1644 index2D->i[0] =
1645 index2D->i[1] =
1646 index2D->i[2] =
1647 index2D->i[3] = 0;
1648 }
1649 }
1650
1651
1652 static void
fetch_source_d(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index)1653 fetch_source_d(const struct tgsi_exec_machine *mach,
1654 union tgsi_exec_channel *chan,
1655 const struct tgsi_full_src_register *reg,
1656 const uint chan_index)
1657 {
1658 union tgsi_exec_channel index;
1659 union tgsi_exec_channel index2D;
1660 uint swizzle;
1661
1662 get_index_registers(mach, reg, &index, &index2D);
1663
1664
1665 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1666 fetch_src_file_channel(mach,
1667 reg->Register.File,
1668 swizzle,
1669 &index,
1670 &index2D,
1671 chan);
1672 }
1673
1674 static void
fetch_source(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const uint chan_index,enum tgsi_exec_datatype src_datatype)1675 fetch_source(const struct tgsi_exec_machine *mach,
1676 union tgsi_exec_channel *chan,
1677 const struct tgsi_full_src_register *reg,
1678 const uint chan_index,
1679 enum tgsi_exec_datatype src_datatype)
1680 {
1681 fetch_source_d(mach, chan, reg, chan_index);
1682
1683 if (reg->Register.Absolute) {
1684 assert(src_datatype == TGSI_EXEC_DATA_FLOAT);
1685 micro_abs(chan, chan);
1686 }
1687
1688 if (reg->Register.Negate) {
1689 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1690 micro_neg(chan, chan);
1691 } else {
1692 micro_ineg(chan, chan);
1693 }
1694 }
1695 }
1696
1697 static union tgsi_exec_channel *
store_dest_dstret(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,uint chan_index)1698 store_dest_dstret(struct tgsi_exec_machine *mach,
1699 const union tgsi_exec_channel *chan,
1700 const struct tgsi_full_dst_register *reg,
1701 uint chan_index)
1702 {
1703 static union tgsi_exec_channel null;
1704 union tgsi_exec_channel *dst;
1705 int offset = 0; /* indirection offset */
1706 int index;
1707
1708
1709 /* There is an extra source register that indirectly subscripts
1710 * a register file. The direct index now becomes an offset
1711 * that is being added to the indirect register.
1712 *
1713 * file[ind[2].x+1],
1714 * where:
1715 * ind = Indirect.File
1716 * [2] = Indirect.Index
1717 * .x = Indirect.SwizzleX
1718 */
1719 if (reg->Register.Indirect) {
1720 union tgsi_exec_channel index;
1721 union tgsi_exec_channel indir_index;
1722 uint swizzle;
1723
1724 /* which address register (always zero for now) */
1725 index.i[0] =
1726 index.i[1] =
1727 index.i[2] =
1728 index.i[3] = reg->Indirect.Index;
1729
1730 /* get current value of address register[swizzle] */
1731 swizzle = reg->Indirect.Swizzle;
1732
1733 /* fetch values from the address/indirection register */
1734 fetch_src_file_channel(mach,
1735 reg->Indirect.File,
1736 swizzle,
1737 &index,
1738 &ZeroVec,
1739 &indir_index);
1740
1741 /* save indirection offset */
1742 offset = indir_index.i[0];
1743 }
1744
1745 switch (reg->Register.File) {
1746 case TGSI_FILE_NULL:
1747 dst = &null;
1748 break;
1749
1750 case TGSI_FILE_OUTPUT:
1751 index = mach->OutputVertexOffset + reg->Register.Index;
1752 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1753 #if 0
1754 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1755 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1756 reg->Register.Index);
1757 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1758 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1759 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1760 if (execmask & (1 << i))
1761 debug_printf("%f, ", chan->f[i]);
1762 debug_printf(")\n");
1763 }
1764 #endif
1765 break;
1766
1767 case TGSI_FILE_TEMPORARY:
1768 index = reg->Register.Index;
1769 assert( index < TGSI_EXEC_NUM_TEMPS );
1770 dst = &mach->Temps[offset + index].xyzw[chan_index];
1771 break;
1772
1773 case TGSI_FILE_ADDRESS:
1774 index = reg->Register.Index;
1775 assert(index >= 0 && index < ARRAY_SIZE(mach->Addrs));
1776 dst = &mach->Addrs[index].xyzw[chan_index];
1777 break;
1778
1779 default:
1780 unreachable("Bad destination file");
1781 }
1782
1783 return dst;
1784 }
1785
1786 static void
store_dest_double(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,uint chan_index)1787 store_dest_double(struct tgsi_exec_machine *mach,
1788 const union tgsi_exec_channel *chan,
1789 const struct tgsi_full_dst_register *reg,
1790 uint chan_index)
1791 {
1792 union tgsi_exec_channel *dst;
1793 const uint execmask = mach->ExecMask;
1794 int i;
1795
1796 dst = store_dest_dstret(mach, chan, reg, chan_index);
1797 if (!dst)
1798 return;
1799
1800 /* doubles path */
1801 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1802 if (execmask & (1 << i))
1803 dst->i[i] = chan->i[i];
1804 }
1805
1806 static void
store_dest(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_index)1807 store_dest(struct tgsi_exec_machine *mach,
1808 const union tgsi_exec_channel *chan,
1809 const struct tgsi_full_dst_register *reg,
1810 const struct tgsi_full_instruction *inst,
1811 uint chan_index)
1812 {
1813 union tgsi_exec_channel *dst;
1814 const uint execmask = mach->ExecMask;
1815 int i;
1816
1817 dst = store_dest_dstret(mach, chan, reg, chan_index);
1818 if (!dst)
1819 return;
1820
1821 if (!inst->Instruction.Saturate) {
1822 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1823 if (execmask & (1 << i))
1824 dst->i[i] = chan->i[i];
1825 }
1826 else {
1827 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1828 if (execmask & (1 << i))
1829 dst->f[i] = fminf(fmaxf(chan->f[i], 0.0f), 1.0f);
1830 }
1831 }
1832
1833 #define FETCH(VAL,INDEX,CHAN)\
1834 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1835
1836 #define IFETCH(VAL,INDEX,CHAN)\
1837 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1838
1839
1840 /**
1841 * Execute ARB-style KIL which is predicated by a src register.
1842 * Kill fragment if any of the four values is less than zero.
1843 */
1844 static void
exec_kill_if(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)1845 exec_kill_if(struct tgsi_exec_machine *mach,
1846 const struct tgsi_full_instruction *inst)
1847 {
1848 uint uniquemask;
1849 uint chan_index;
1850 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1851 union tgsi_exec_channel r[1];
1852
1853 /* This mask stores component bits that were already tested. */
1854 uniquemask = 0;
1855
1856 for (chan_index = 0; chan_index < 4; chan_index++)
1857 {
1858 uint swizzle;
1859 uint i;
1860
1861 /* unswizzle channel */
1862 swizzle = tgsi_util_get_full_src_register_swizzle (
1863 &inst->Src[0],
1864 chan_index);
1865
1866 /* check if the component has not been already tested */
1867 if (uniquemask & (1 << swizzle))
1868 continue;
1869 uniquemask |= 1 << swizzle;
1870
1871 FETCH(&r[0], 0, chan_index);
1872 for (i = 0; i < 4; i++)
1873 if (r[0].f[i] < 0.0f)
1874 kilmask |= 1 << i;
1875 }
1876
1877 /* restrict to fragments currently executing */
1878 kilmask &= mach->ExecMask;
1879
1880 mach->KillMask |= kilmask;
1881 }
1882
1883 /**
1884 * Unconditional fragment kill/discard.
1885 */
1886 static void
exec_kill(struct tgsi_exec_machine * mach)1887 exec_kill(struct tgsi_exec_machine *mach)
1888 {
1889 /* kill fragment for all fragments currently executing.
1890 * bit 0 = pixel 0, bit 1 = pixel 1, etc.
1891 */
1892 mach->KillMask |= mach->ExecMask;
1893 }
1894
1895 static void
emit_vertex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)1896 emit_vertex(struct tgsi_exec_machine *mach,
1897 const struct tgsi_full_instruction *inst)
1898 {
1899 union tgsi_exec_channel r[1];
1900 unsigned stream_id;
1901 unsigned prim_count;
1902 /* FIXME: check for exec mask correctly
1903 unsigned i;
1904 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1905 if ((mach->ExecMask & (1 << i)))
1906 */
1907 IFETCH(&r[0], 0, TGSI_CHAN_X);
1908 stream_id = r[0].u[0];
1909 prim_count = mach->OutputPrimCount[stream_id];
1910 if (mach->ExecMask) {
1911 if (mach->Primitives[stream_id][prim_count] >= mach->MaxOutputVertices)
1912 return;
1913
1914 if (mach->Primitives[stream_id][prim_count] == 0)
1915 mach->PrimitiveOffsets[stream_id][prim_count] = mach->OutputVertexOffset;
1916 mach->OutputVertexOffset += mach->NumOutputs;
1917 mach->Primitives[stream_id][prim_count]++;
1918 }
1919 }
1920
1921 static void
emit_primitive(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)1922 emit_primitive(struct tgsi_exec_machine *mach,
1923 const struct tgsi_full_instruction *inst)
1924 {
1925 unsigned *prim_count;
1926 union tgsi_exec_channel r[1];
1927 unsigned stream_id = 0;
1928 /* FIXME: check for exec mask correctly
1929 unsigned i;
1930 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1931 if ((mach->ExecMask & (1 << i)))
1932 */
1933 if (inst) {
1934 IFETCH(&r[0], 0, TGSI_CHAN_X);
1935 stream_id = r[0].u[0];
1936 }
1937 prim_count = &mach->OutputPrimCount[stream_id];
1938 if (mach->ExecMask) {
1939 ++(*prim_count);
1940 debug_assert((*prim_count * mach->NumOutputs) < TGSI_MAX_TOTAL_VERTICES);
1941 mach->Primitives[stream_id][*prim_count] = 0;
1942 }
1943 }
1944
1945 static void
conditional_emit_primitive(struct tgsi_exec_machine * mach)1946 conditional_emit_primitive(struct tgsi_exec_machine *mach)
1947 {
1948 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1949 int emitted_verts = mach->Primitives[0][mach->OutputPrimCount[0]];
1950 if (emitted_verts) {
1951 emit_primitive(mach, NULL);
1952 }
1953 }
1954 }
1955
1956
1957 /*
1958 * Fetch four texture samples using STR texture coordinates.
1959 */
1960 static void
fetch_texel(struct tgsi_sampler * sampler,const unsigned sview_idx,const unsigned sampler_idx,const union tgsi_exec_channel * s,const union tgsi_exec_channel * t,const union tgsi_exec_channel * p,const union tgsi_exec_channel * c0,const union tgsi_exec_channel * c1,float derivs[3][2][TGSI_QUAD_SIZE],const int8_t offset[3],enum tgsi_sampler_control control,union tgsi_exec_channel * r,union tgsi_exec_channel * g,union tgsi_exec_channel * b,union tgsi_exec_channel * a)1961 fetch_texel( struct tgsi_sampler *sampler,
1962 const unsigned sview_idx,
1963 const unsigned sampler_idx,
1964 const union tgsi_exec_channel *s,
1965 const union tgsi_exec_channel *t,
1966 const union tgsi_exec_channel *p,
1967 const union tgsi_exec_channel *c0,
1968 const union tgsi_exec_channel *c1,
1969 float derivs[3][2][TGSI_QUAD_SIZE],
1970 const int8_t offset[3],
1971 enum tgsi_sampler_control control,
1972 union tgsi_exec_channel *r,
1973 union tgsi_exec_channel *g,
1974 union tgsi_exec_channel *b,
1975 union tgsi_exec_channel *a )
1976 {
1977 uint j;
1978 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1979
1980 /* FIXME: handle explicit derivs, offsets */
1981 sampler->get_samples(sampler, sview_idx, sampler_idx,
1982 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
1983
1984 for (j = 0; j < 4; j++) {
1985 r->f[j] = rgba[0][j];
1986 g->f[j] = rgba[1][j];
1987 b->f[j] = rgba[2][j];
1988 a->f[j] = rgba[3][j];
1989 }
1990 }
1991
1992
1993 #define TEX_MODIFIER_NONE 0
1994 #define TEX_MODIFIER_PROJECTED 1
1995 #define TEX_MODIFIER_LOD_BIAS 2
1996 #define TEX_MODIFIER_EXPLICIT_LOD 3
1997 #define TEX_MODIFIER_LEVEL_ZERO 4
1998 #define TEX_MODIFIER_GATHER 5
1999
2000 /*
2001 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
2002 */
2003 static void
fetch_texel_offsets(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int8_t offsets[3])2004 fetch_texel_offsets(struct tgsi_exec_machine *mach,
2005 const struct tgsi_full_instruction *inst,
2006 int8_t offsets[3])
2007 {
2008 if (inst->Texture.NumOffsets == 1) {
2009 union tgsi_exec_channel index;
2010 union tgsi_exec_channel offset[3];
2011 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
2012 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2013 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
2014 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2015 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
2016 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
2017 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
2018 offsets[0] = offset[0].i[0];
2019 offsets[1] = offset[1].i[0];
2020 offsets[2] = offset[2].i[0];
2021 } else {
2022 assert(inst->Texture.NumOffsets == 0);
2023 offsets[0] = offsets[1] = offsets[2] = 0;
2024 }
2025 }
2026
2027
2028 /*
2029 * Fetch dx and dy values for one channel (s, t or r).
2030 * Put dx values into one float array, dy values into another.
2031 */
2032 static void
fetch_assign_deriv_channel(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,unsigned regdsrcx,unsigned chan,float derivs[2][TGSI_QUAD_SIZE])2033 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
2034 const struct tgsi_full_instruction *inst,
2035 unsigned regdsrcx,
2036 unsigned chan,
2037 float derivs[2][TGSI_QUAD_SIZE])
2038 {
2039 union tgsi_exec_channel d;
2040 FETCH(&d, regdsrcx, chan);
2041 derivs[0][0] = d.f[0];
2042 derivs[0][1] = d.f[1];
2043 derivs[0][2] = d.f[2];
2044 derivs[0][3] = d.f[3];
2045 FETCH(&d, regdsrcx + 1, chan);
2046 derivs[1][0] = d.f[0];
2047 derivs[1][1] = d.f[1];
2048 derivs[1][2] = d.f[2];
2049 derivs[1][3] = d.f[3];
2050 }
2051
2052 static uint
fetch_sampler_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint sampler)2053 fetch_sampler_unit(struct tgsi_exec_machine *mach,
2054 const struct tgsi_full_instruction *inst,
2055 uint sampler)
2056 {
2057 uint unit = 0;
2058 int i;
2059 if (inst->Src[sampler].Register.Indirect) {
2060 const struct tgsi_full_src_register *reg = &inst->Src[sampler];
2061 union tgsi_exec_channel indir_index, index2;
2062 const uint execmask = mach->ExecMask;
2063 index2.i[0] =
2064 index2.i[1] =
2065 index2.i[2] =
2066 index2.i[3] = reg->Indirect.Index;
2067
2068 fetch_src_file_channel(mach,
2069 reg->Indirect.File,
2070 reg->Indirect.Swizzle,
2071 &index2,
2072 &ZeroVec,
2073 &indir_index);
2074 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2075 if (execmask & (1 << i)) {
2076 unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2077 break;
2078 }
2079 }
2080
2081 } else {
2082 unit = inst->Src[sampler].Register.Index;
2083 }
2084 return unit;
2085 }
2086
2087 /*
2088 * execute a texture instruction.
2089 *
2090 * modifier is used to control the channel routing for the
2091 * instruction variants like proj, lod, and texture with lod bias.
2092 * sampler indicates which src register the sampler is contained in.
2093 */
2094 static void
exec_tex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,uint sampler)2095 exec_tex(struct tgsi_exec_machine *mach,
2096 const struct tgsi_full_instruction *inst,
2097 uint modifier, uint sampler)
2098 {
2099 const union tgsi_exec_channel *args[5], *proj = NULL;
2100 union tgsi_exec_channel r[5];
2101 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2102 uint chan;
2103 uint unit;
2104 int8_t offsets[3];
2105 int dim, shadow_ref, i;
2106
2107 unit = fetch_sampler_unit(mach, inst, sampler);
2108 /* always fetch all 3 offsets, overkill but keeps code simple */
2109 fetch_texel_offsets(mach, inst, offsets);
2110
2111 assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2112 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2113
2114 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2115 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2116
2117 assert(dim <= 4);
2118 if (shadow_ref >= 0)
2119 assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args));
2120
2121 /* fetch modifier to the last argument */
2122 if (modifier != TEX_MODIFIER_NONE) {
2123 const int last = ARRAY_SIZE(args) - 1;
2124
2125 /* fetch modifier from src0.w or src1.x */
2126 if (sampler == 1) {
2127 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2128 FETCH(&r[last], 0, TGSI_CHAN_W);
2129 }
2130 else {
2131 FETCH(&r[last], 1, TGSI_CHAN_X);
2132 }
2133
2134 if (modifier != TEX_MODIFIER_PROJECTED) {
2135 args[last] = &r[last];
2136 }
2137 else {
2138 proj = &r[last];
2139 args[last] = &ZeroVec;
2140 }
2141
2142 /* point unused arguments to zero vector */
2143 for (i = dim; i < last; i++)
2144 args[i] = &ZeroVec;
2145
2146 if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2147 control = TGSI_SAMPLER_LOD_EXPLICIT;
2148 else if (modifier == TEX_MODIFIER_LOD_BIAS)
2149 control = TGSI_SAMPLER_LOD_BIAS;
2150 else if (modifier == TEX_MODIFIER_GATHER)
2151 control = TGSI_SAMPLER_GATHER;
2152 }
2153 else {
2154 for (i = dim; i < (int)ARRAY_SIZE(args); i++)
2155 args[i] = &ZeroVec;
2156 }
2157
2158 /* fetch coordinates */
2159 for (i = 0; i < dim; i++) {
2160 FETCH(&r[i], 0, TGSI_CHAN_X + i);
2161
2162 if (proj)
2163 micro_div(&r[i], &r[i], proj);
2164
2165 args[i] = &r[i];
2166 }
2167
2168 /* fetch reference value */
2169 if (shadow_ref >= 0) {
2170 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2171
2172 if (proj)
2173 micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2174
2175 args[shadow_ref] = &r[shadow_ref];
2176 }
2177
2178 fetch_texel(mach->Sampler, unit, unit,
2179 args[0], args[1], args[2], args[3], args[4],
2180 NULL, offsets, control,
2181 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2182
2183 #if 0
2184 debug_printf("fetch r: %g %g %g %g\n",
2185 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2186 debug_printf("fetch g: %g %g %g %g\n",
2187 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2188 debug_printf("fetch b: %g %g %g %g\n",
2189 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2190 debug_printf("fetch a: %g %g %g %g\n",
2191 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2192 #endif
2193
2194 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2195 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2196 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2197 }
2198 }
2199 }
2200
2201 static void
exec_lodq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2202 exec_lodq(struct tgsi_exec_machine *mach,
2203 const struct tgsi_full_instruction *inst)
2204 {
2205 uint resource_unit, sampler_unit;
2206 unsigned dim;
2207 unsigned i;
2208 union tgsi_exec_channel coords[4];
2209 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2210 union tgsi_exec_channel r[2];
2211
2212 resource_unit = fetch_sampler_unit(mach, inst, 1);
2213 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2214 uint target = mach->SamplerViews[resource_unit].Resource;
2215 dim = tgsi_util_get_texture_coord_dim(target);
2216 sampler_unit = fetch_sampler_unit(mach, inst, 2);
2217 } else {
2218 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2219 sampler_unit = resource_unit;
2220 }
2221 assert(dim <= ARRAY_SIZE(coords));
2222 /* fetch coordinates */
2223 for (i = 0; i < dim; i++) {
2224 FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2225 args[i] = &coords[i];
2226 }
2227 for (i = dim; i < ARRAY_SIZE(coords); i++) {
2228 args[i] = &ZeroVec;
2229 }
2230 mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
2231 args[0]->f,
2232 args[1]->f,
2233 args[2]->f,
2234 args[3]->f,
2235 TGSI_SAMPLER_LOD_NONE,
2236 r[0].f,
2237 r[1].f);
2238
2239 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2240 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
2241 }
2242 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2243 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);
2244 }
2245 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2246 unsigned char swizzles[4];
2247 unsigned chan;
2248 swizzles[0] = inst->Src[1].Register.SwizzleX;
2249 swizzles[1] = inst->Src[1].Register.SwizzleY;
2250 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2251 swizzles[3] = inst->Src[1].Register.SwizzleW;
2252
2253 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2254 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2255 if (swizzles[chan] >= 2) {
2256 store_dest(mach, &ZeroVec,
2257 &inst->Dst[0], inst, chan);
2258 } else {
2259 store_dest(mach, &r[swizzles[chan]],
2260 &inst->Dst[0], inst, chan);
2261 }
2262 }
2263 }
2264 } else {
2265 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2266 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
2267 }
2268 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2269 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);
2270 }
2271 }
2272 }
2273
2274 static void
exec_txd(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2275 exec_txd(struct tgsi_exec_machine *mach,
2276 const struct tgsi_full_instruction *inst)
2277 {
2278 union tgsi_exec_channel r[4];
2279 float derivs[3][2][TGSI_QUAD_SIZE];
2280 uint chan;
2281 uint unit;
2282 int8_t offsets[3];
2283
2284 unit = fetch_sampler_unit(mach, inst, 3);
2285 /* always fetch all 3 offsets, overkill but keeps code simple */
2286 fetch_texel_offsets(mach, inst, offsets);
2287
2288 switch (inst->Texture.Texture) {
2289 case TGSI_TEXTURE_1D:
2290 FETCH(&r[0], 0, TGSI_CHAN_X);
2291
2292 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2293
2294 fetch_texel(mach->Sampler, unit, unit,
2295 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2296 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2297 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2298 break;
2299
2300 case TGSI_TEXTURE_SHADOW1D:
2301 case TGSI_TEXTURE_1D_ARRAY:
2302 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2303 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2304 FETCH(&r[0], 0, TGSI_CHAN_X);
2305 FETCH(&r[1], 0, TGSI_CHAN_Y);
2306 FETCH(&r[2], 0, TGSI_CHAN_Z);
2307
2308 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2309
2310 fetch_texel(mach->Sampler, unit, unit,
2311 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2312 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2313 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2314 break;
2315
2316 case TGSI_TEXTURE_2D:
2317 case TGSI_TEXTURE_RECT:
2318 FETCH(&r[0], 0, TGSI_CHAN_X);
2319 FETCH(&r[1], 0, TGSI_CHAN_Y);
2320
2321 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2322 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2323
2324 fetch_texel(mach->Sampler, unit, unit,
2325 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2326 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2327 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2328 break;
2329
2330
2331 case TGSI_TEXTURE_SHADOW2D:
2332 case TGSI_TEXTURE_SHADOWRECT:
2333 case TGSI_TEXTURE_2D_ARRAY:
2334 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2335 /* only SHADOW2D_ARRAY actually needs W */
2336 FETCH(&r[0], 0, TGSI_CHAN_X);
2337 FETCH(&r[1], 0, TGSI_CHAN_Y);
2338 FETCH(&r[2], 0, TGSI_CHAN_Z);
2339 FETCH(&r[3], 0, TGSI_CHAN_W);
2340
2341 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2342 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2343
2344 fetch_texel(mach->Sampler, unit, unit,
2345 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2346 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2347 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2348 break;
2349
2350 case TGSI_TEXTURE_3D:
2351 case TGSI_TEXTURE_CUBE:
2352 case TGSI_TEXTURE_CUBE_ARRAY:
2353 case TGSI_TEXTURE_SHADOWCUBE:
2354 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2355 FETCH(&r[0], 0, TGSI_CHAN_X);
2356 FETCH(&r[1], 0, TGSI_CHAN_Y);
2357 FETCH(&r[2], 0, TGSI_CHAN_Z);
2358 FETCH(&r[3], 0, TGSI_CHAN_W);
2359
2360 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2361 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2362 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2363
2364 fetch_texel(mach->Sampler, unit, unit,
2365 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2366 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2367 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2368 break;
2369
2370 default:
2371 assert(0);
2372 }
2373
2374 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2375 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2376 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2377 }
2378 }
2379 }
2380
2381
2382 static void
exec_txf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2383 exec_txf(struct tgsi_exec_machine *mach,
2384 const struct tgsi_full_instruction *inst)
2385 {
2386 union tgsi_exec_channel r[4];
2387 uint chan;
2388 uint unit;
2389 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2390 int j;
2391 int8_t offsets[3];
2392 unsigned target;
2393
2394 unit = fetch_sampler_unit(mach, inst, 1);
2395 /* always fetch all 3 offsets, overkill but keeps code simple */
2396 fetch_texel_offsets(mach, inst, offsets);
2397
2398 IFETCH(&r[3], 0, TGSI_CHAN_W);
2399
2400 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2401 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2402 target = mach->SamplerViews[unit].Resource;
2403 }
2404 else {
2405 target = inst->Texture.Texture;
2406 }
2407 switch(target) {
2408 case TGSI_TEXTURE_3D:
2409 case TGSI_TEXTURE_2D_ARRAY:
2410 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2411 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2412 IFETCH(&r[2], 0, TGSI_CHAN_Z);
2413 FALLTHROUGH;
2414 case TGSI_TEXTURE_2D:
2415 case TGSI_TEXTURE_RECT:
2416 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2417 case TGSI_TEXTURE_SHADOW2D:
2418 case TGSI_TEXTURE_SHADOWRECT:
2419 case TGSI_TEXTURE_1D_ARRAY:
2420 case TGSI_TEXTURE_2D_MSAA:
2421 IFETCH(&r[1], 0, TGSI_CHAN_Y);
2422 FALLTHROUGH;
2423 case TGSI_TEXTURE_BUFFER:
2424 case TGSI_TEXTURE_1D:
2425 case TGSI_TEXTURE_SHADOW1D:
2426 IFETCH(&r[0], 0, TGSI_CHAN_X);
2427 break;
2428 default:
2429 assert(0);
2430 break;
2431 }
2432
2433 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2434 offsets, rgba);
2435
2436 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2437 r[0].f[j] = rgba[0][j];
2438 r[1].f[j] = rgba[1][j];
2439 r[2].f[j] = rgba[2][j];
2440 r[3].f[j] = rgba[3][j];
2441 }
2442
2443 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2444 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2445 unsigned char swizzles[4];
2446 swizzles[0] = inst->Src[1].Register.SwizzleX;
2447 swizzles[1] = inst->Src[1].Register.SwizzleY;
2448 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2449 swizzles[3] = inst->Src[1].Register.SwizzleW;
2450
2451 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2452 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2453 store_dest(mach, &r[swizzles[chan]],
2454 &inst->Dst[0], inst, chan);
2455 }
2456 }
2457 }
2458 else {
2459 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2460 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2461 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2462 }
2463 }
2464 }
2465 }
2466
2467 static void
exec_txq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2468 exec_txq(struct tgsi_exec_machine *mach,
2469 const struct tgsi_full_instruction *inst)
2470 {
2471 int result[4];
2472 union tgsi_exec_channel r[4], src;
2473 uint chan;
2474 uint unit;
2475 int i,j;
2476
2477 unit = fetch_sampler_unit(mach, inst, 1);
2478
2479 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2480
2481 /* XXX: This interface can't return per-pixel values */
2482 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2483
2484 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2485 for (j = 0; j < 4; j++) {
2486 r[j].i[i] = result[j];
2487 }
2488 }
2489
2490 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2491 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2492 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2493 }
2494 }
2495 }
2496
2497 static void
exec_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,uint modifier,boolean compare)2498 exec_sample(struct tgsi_exec_machine *mach,
2499 const struct tgsi_full_instruction *inst,
2500 uint modifier, boolean compare)
2501 {
2502 const uint resource_unit = inst->Src[1].Register.Index;
2503 const uint sampler_unit = inst->Src[2].Register.Index;
2504 union tgsi_exec_channel r[5], c1;
2505 const union tgsi_exec_channel *lod = &ZeroVec;
2506 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2507 uint chan;
2508 unsigned char swizzles[4];
2509 int8_t offsets[3];
2510
2511 /* always fetch all 3 offsets, overkill but keeps code simple */
2512 fetch_texel_offsets(mach, inst, offsets);
2513
2514 assert(modifier != TEX_MODIFIER_PROJECTED);
2515
2516 if (modifier != TEX_MODIFIER_NONE) {
2517 if (modifier == TEX_MODIFIER_LOD_BIAS) {
2518 FETCH(&c1, 3, TGSI_CHAN_X);
2519 lod = &c1;
2520 control = TGSI_SAMPLER_LOD_BIAS;
2521 }
2522 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2523 FETCH(&c1, 3, TGSI_CHAN_X);
2524 lod = &c1;
2525 control = TGSI_SAMPLER_LOD_EXPLICIT;
2526 }
2527 else if (modifier == TEX_MODIFIER_GATHER) {
2528 control = TGSI_SAMPLER_GATHER;
2529 }
2530 else {
2531 assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2532 control = TGSI_SAMPLER_LOD_ZERO;
2533 }
2534 }
2535
2536 FETCH(&r[0], 0, TGSI_CHAN_X);
2537
2538 switch (mach->SamplerViews[resource_unit].Resource) {
2539 case TGSI_TEXTURE_1D:
2540 if (compare) {
2541 FETCH(&r[2], 3, TGSI_CHAN_X);
2542 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2543 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2544 NULL, offsets, control,
2545 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2546 }
2547 else {
2548 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2549 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2550 NULL, offsets, control,
2551 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2552 }
2553 break;
2554
2555 case TGSI_TEXTURE_1D_ARRAY:
2556 case TGSI_TEXTURE_2D:
2557 case TGSI_TEXTURE_RECT:
2558 FETCH(&r[1], 0, TGSI_CHAN_Y);
2559 if (compare) {
2560 FETCH(&r[2], 3, TGSI_CHAN_X);
2561 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2562 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2563 NULL, offsets, control,
2564 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2565 }
2566 else {
2567 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2568 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2569 NULL, offsets, control,
2570 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2571 }
2572 break;
2573
2574 case TGSI_TEXTURE_2D_ARRAY:
2575 case TGSI_TEXTURE_3D:
2576 case TGSI_TEXTURE_CUBE:
2577 FETCH(&r[1], 0, TGSI_CHAN_Y);
2578 FETCH(&r[2], 0, TGSI_CHAN_Z);
2579 if(compare) {
2580 FETCH(&r[3], 3, TGSI_CHAN_X);
2581 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2582 &r[0], &r[1], &r[2], &r[3], lod,
2583 NULL, offsets, control,
2584 &r[0], &r[1], &r[2], &r[3]);
2585 }
2586 else {
2587 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2588 &r[0], &r[1], &r[2], &ZeroVec, lod,
2589 NULL, offsets, control,
2590 &r[0], &r[1], &r[2], &r[3]);
2591 }
2592 break;
2593
2594 case TGSI_TEXTURE_CUBE_ARRAY:
2595 FETCH(&r[1], 0, TGSI_CHAN_Y);
2596 FETCH(&r[2], 0, TGSI_CHAN_Z);
2597 FETCH(&r[3], 0, TGSI_CHAN_W);
2598 if(compare) {
2599 FETCH(&r[4], 3, TGSI_CHAN_X);
2600 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2601 &r[0], &r[1], &r[2], &r[3], &r[4],
2602 NULL, offsets, control,
2603 &r[0], &r[1], &r[2], &r[3]);
2604 }
2605 else {
2606 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2607 &r[0], &r[1], &r[2], &r[3], lod,
2608 NULL, offsets, control,
2609 &r[0], &r[1], &r[2], &r[3]);
2610 }
2611 break;
2612
2613
2614 default:
2615 assert(0);
2616 }
2617
2618 swizzles[0] = inst->Src[1].Register.SwizzleX;
2619 swizzles[1] = inst->Src[1].Register.SwizzleY;
2620 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2621 swizzles[3] = inst->Src[1].Register.SwizzleW;
2622
2623 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2624 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2625 store_dest(mach, &r[swizzles[chan]],
2626 &inst->Dst[0], inst, chan);
2627 }
2628 }
2629 }
2630
2631 static void
exec_sample_d(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2632 exec_sample_d(struct tgsi_exec_machine *mach,
2633 const struct tgsi_full_instruction *inst)
2634 {
2635 const uint resource_unit = inst->Src[1].Register.Index;
2636 const uint sampler_unit = inst->Src[2].Register.Index;
2637 union tgsi_exec_channel r[4];
2638 float derivs[3][2][TGSI_QUAD_SIZE];
2639 uint chan;
2640 unsigned char swizzles[4];
2641 int8_t offsets[3];
2642
2643 /* always fetch all 3 offsets, overkill but keeps code simple */
2644 fetch_texel_offsets(mach, inst, offsets);
2645
2646 FETCH(&r[0], 0, TGSI_CHAN_X);
2647
2648 switch (mach->SamplerViews[resource_unit].Resource) {
2649 case TGSI_TEXTURE_1D:
2650 case TGSI_TEXTURE_1D_ARRAY:
2651 /* only 1D array actually needs Y */
2652 FETCH(&r[1], 0, TGSI_CHAN_Y);
2653
2654 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2655
2656 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2657 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2658 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2659 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2660 break;
2661
2662 case TGSI_TEXTURE_2D:
2663 case TGSI_TEXTURE_RECT:
2664 case TGSI_TEXTURE_2D_ARRAY:
2665 /* only 2D array actually needs Z */
2666 FETCH(&r[1], 0, TGSI_CHAN_Y);
2667 FETCH(&r[2], 0, TGSI_CHAN_Z);
2668
2669 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2670 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2671
2672 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2673 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */
2674 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2675 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2676 break;
2677
2678 case TGSI_TEXTURE_3D:
2679 case TGSI_TEXTURE_CUBE:
2680 case TGSI_TEXTURE_CUBE_ARRAY:
2681 /* only cube array actually needs W */
2682 FETCH(&r[1], 0, TGSI_CHAN_Y);
2683 FETCH(&r[2], 0, TGSI_CHAN_Z);
2684 FETCH(&r[3], 0, TGSI_CHAN_W);
2685
2686 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2687 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2688 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2689
2690 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2691 &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2692 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2693 &r[0], &r[1], &r[2], &r[3]);
2694 break;
2695
2696 default:
2697 assert(0);
2698 }
2699
2700 swizzles[0] = inst->Src[1].Register.SwizzleX;
2701 swizzles[1] = inst->Src[1].Register.SwizzleY;
2702 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2703 swizzles[3] = inst->Src[1].Register.SwizzleW;
2704
2705 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2706 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2707 store_dest(mach, &r[swizzles[chan]],
2708 &inst->Dst[0], inst, chan);
2709 }
2710 }
2711 }
2712
2713
2714 /**
2715 * Evaluate a constant-valued coefficient at the position of the
2716 * current quad.
2717 */
2718 static void
eval_constant_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2719 eval_constant_coef(
2720 struct tgsi_exec_machine *mach,
2721 unsigned attrib,
2722 unsigned chan )
2723 {
2724 unsigned i;
2725
2726 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2727 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2728 }
2729 }
2730
2731 static void
interp_constant_offset(UNUSED const struct tgsi_exec_machine * mach,UNUSED unsigned attrib,UNUSED unsigned chan,UNUSED float ofs_x,UNUSED float ofs_y,UNUSED union tgsi_exec_channel * out_chan)2732 interp_constant_offset(
2733 UNUSED const struct tgsi_exec_machine *mach,
2734 UNUSED unsigned attrib,
2735 UNUSED unsigned chan,
2736 UNUSED float ofs_x,
2737 UNUSED float ofs_y,
2738 UNUSED union tgsi_exec_channel *out_chan)
2739 {
2740 }
2741
2742 /**
2743 * Evaluate a linear-valued coefficient at the position of the
2744 * current quad.
2745 */
2746 static void
interp_linear_offset(const struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan,float ofs_x,float ofs_y,union tgsi_exec_channel * out_chan)2747 interp_linear_offset(
2748 const struct tgsi_exec_machine *mach,
2749 unsigned attrib,
2750 unsigned chan,
2751 float ofs_x,
2752 float ofs_y,
2753 union tgsi_exec_channel *out_chan)
2754 {
2755 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2756 const float dady = mach->InterpCoefs[attrib].dady[chan];
2757 const float delta = ofs_x * dadx + ofs_y * dady;
2758 out_chan->f[0] += delta;
2759 out_chan->f[1] += delta;
2760 out_chan->f[2] += delta;
2761 out_chan->f[3] += delta;
2762 }
2763
2764 static void
eval_linear_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2765 eval_linear_coef(struct tgsi_exec_machine *mach,
2766 unsigned attrib,
2767 unsigned chan)
2768 {
2769 const float x = mach->QuadPos.xyzw[0].f[0];
2770 const float y = mach->QuadPos.xyzw[1].f[0];
2771 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2772 const float dady = mach->InterpCoefs[attrib].dady[chan];
2773 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2774
2775 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2776 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2777 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2778 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2779 }
2780
2781 /**
2782 * Evaluate a perspective-valued coefficient at the position of the
2783 * current quad.
2784 */
2785
2786 static void
interp_perspective_offset(const struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan,float ofs_x,float ofs_y,union tgsi_exec_channel * out_chan)2787 interp_perspective_offset(
2788 const struct tgsi_exec_machine *mach,
2789 unsigned attrib,
2790 unsigned chan,
2791 float ofs_x,
2792 float ofs_y,
2793 union tgsi_exec_channel *out_chan)
2794 {
2795 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2796 const float dady = mach->InterpCoefs[attrib].dady[chan];
2797 const float *w = mach->QuadPos.xyzw[3].f;
2798 const float delta = ofs_x * dadx + ofs_y * dady;
2799 out_chan->f[0] += delta / w[0];
2800 out_chan->f[1] += delta / w[1];
2801 out_chan->f[2] += delta / w[2];
2802 out_chan->f[3] += delta / w[3];
2803 }
2804
2805 static void
eval_perspective_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2806 eval_perspective_coef(
2807 struct tgsi_exec_machine *mach,
2808 unsigned attrib,
2809 unsigned chan )
2810 {
2811 const float x = mach->QuadPos.xyzw[0].f[0];
2812 const float y = mach->QuadPos.xyzw[1].f[0];
2813 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2814 const float dady = mach->InterpCoefs[attrib].dady[chan];
2815 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2816 const float *w = mach->QuadPos.xyzw[3].f;
2817 /* divide by W here */
2818 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2819 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2820 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2821 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2822 }
2823
2824
2825 typedef void (* eval_coef_func)(
2826 struct tgsi_exec_machine *mach,
2827 unsigned attrib,
2828 unsigned chan );
2829
2830 static void
exec_declaration(struct tgsi_exec_machine * mach,const struct tgsi_full_declaration * decl)2831 exec_declaration(struct tgsi_exec_machine *mach,
2832 const struct tgsi_full_declaration *decl)
2833 {
2834 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2835 mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2836 return;
2837 }
2838
2839 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
2840 if (decl->Declaration.File == TGSI_FILE_INPUT) {
2841 uint first, last, mask;
2842
2843 first = decl->Range.First;
2844 last = decl->Range.Last;
2845 mask = decl->Declaration.UsageMask;
2846
2847 /* XXX we could remove this special-case code since
2848 * mach->InterpCoefs[first].a0 should already have the
2849 * front/back-face value. But we should first update the
2850 * ureg code to emit the right UsageMask value (WRITEMASK_X).
2851 * Then, we could remove the tgsi_exec_machine::Face field.
2852 */
2853 /* XXX make FACE a system value */
2854 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2855 uint i;
2856
2857 assert(decl->Semantic.Index == 0);
2858 assert(first == last);
2859
2860 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2861 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2862 }
2863 } else {
2864 eval_coef_func eval;
2865 apply_sample_offset_func interp;
2866 uint i, j;
2867
2868 switch (decl->Interp.Interpolate) {
2869 case TGSI_INTERPOLATE_CONSTANT:
2870 eval = eval_constant_coef;
2871 interp = interp_constant_offset;
2872 break;
2873
2874 case TGSI_INTERPOLATE_LINEAR:
2875 eval = eval_linear_coef;
2876 interp = interp_linear_offset;
2877 break;
2878
2879 case TGSI_INTERPOLATE_PERSPECTIVE:
2880 eval = eval_perspective_coef;
2881 interp = interp_perspective_offset;
2882 break;
2883
2884 case TGSI_INTERPOLATE_COLOR:
2885 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
2886 interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;
2887 break;
2888
2889 default:
2890 assert(0);
2891 return;
2892 }
2893
2894 for (i = first; i <= last; i++)
2895 mach->InputSampleOffsetApply[i] = interp;
2896
2897 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2898 if (mask & (1 << j)) {
2899 for (i = first; i <= last; i++) {
2900 eval(mach, i, j);
2901 }
2902 }
2903 }
2904 }
2905
2906 if (DEBUG_EXECUTION) {
2907 uint i, j;
2908 for (i = first; i <= last; ++i) {
2909 debug_printf("IN[%2u] = ", i);
2910 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2911 if (j > 0) {
2912 debug_printf(" ");
2913 }
2914 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
2915 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
2916 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
2917 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
2918 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
2919 }
2920 }
2921 }
2922 }
2923 }
2924
2925 }
2926
2927 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
2928 const union tgsi_exec_channel *src);
2929
2930 static void
exec_scalar_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype src_datatype)2931 exec_scalar_unary(struct tgsi_exec_machine *mach,
2932 const struct tgsi_full_instruction *inst,
2933 micro_unary_op op,
2934 enum tgsi_exec_datatype src_datatype)
2935 {
2936 unsigned int chan;
2937 union tgsi_exec_channel src;
2938 union tgsi_exec_channel dst;
2939
2940 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
2941 op(&dst, &src);
2942 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2943 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2944 store_dest(mach, &dst, &inst->Dst[0], inst, chan);
2945 }
2946 }
2947 }
2948
2949 static void
exec_vector_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype src_datatype)2950 exec_vector_unary(struct tgsi_exec_machine *mach,
2951 const struct tgsi_full_instruction *inst,
2952 micro_unary_op op,
2953 enum tgsi_exec_datatype src_datatype)
2954 {
2955 unsigned int chan;
2956 struct tgsi_exec_vector dst;
2957
2958 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2959 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2960 union tgsi_exec_channel src;
2961
2962 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2963 op(&dst.xyzw[chan], &src);
2964 }
2965 }
2966 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2967 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2968 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
2969 }
2970 }
2971 }
2972
2973 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2974 const union tgsi_exec_channel *src0,
2975 const union tgsi_exec_channel *src1);
2976
2977 static void
exec_scalar_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype src_datatype)2978 exec_scalar_binary(struct tgsi_exec_machine *mach,
2979 const struct tgsi_full_instruction *inst,
2980 micro_binary_op op,
2981 enum tgsi_exec_datatype src_datatype)
2982 {
2983 unsigned int chan;
2984 union tgsi_exec_channel src[2];
2985 union tgsi_exec_channel dst;
2986
2987 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
2988 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
2989 op(&dst, &src[0], &src[1]);
2990 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2991 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2992 store_dest(mach, &dst, &inst->Dst[0], inst, chan);
2993 }
2994 }
2995 }
2996
2997 static void
exec_vector_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype src_datatype)2998 exec_vector_binary(struct tgsi_exec_machine *mach,
2999 const struct tgsi_full_instruction *inst,
3000 micro_binary_op op,
3001 enum tgsi_exec_datatype src_datatype)
3002 {
3003 unsigned int chan;
3004 struct tgsi_exec_vector dst;
3005
3006 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3007 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3008 union tgsi_exec_channel src[2];
3009
3010 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3011 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3012 op(&dst.xyzw[chan], &src[0], &src[1]);
3013 }
3014 }
3015 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3016 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3017 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3018 }
3019 }
3020 }
3021
3022 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
3023 const union tgsi_exec_channel *src0,
3024 const union tgsi_exec_channel *src1,
3025 const union tgsi_exec_channel *src2);
3026
3027 static void
exec_vector_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_trinary_op op,enum tgsi_exec_datatype src_datatype)3028 exec_vector_trinary(struct tgsi_exec_machine *mach,
3029 const struct tgsi_full_instruction *inst,
3030 micro_trinary_op op,
3031 enum tgsi_exec_datatype src_datatype)
3032 {
3033 unsigned int chan;
3034 struct tgsi_exec_vector dst;
3035
3036 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3037 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3038 union tgsi_exec_channel src[3];
3039
3040 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3041 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3042 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3043 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3044 }
3045 }
3046 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3047 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3048 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3049 }
3050 }
3051 }
3052
3053 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
3054 const union tgsi_exec_channel *src0,
3055 const union tgsi_exec_channel *src1,
3056 const union tgsi_exec_channel *src2,
3057 const union tgsi_exec_channel *src3);
3058
3059 static void
exec_vector_quaternary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_quaternary_op op,enum tgsi_exec_datatype src_datatype)3060 exec_vector_quaternary(struct tgsi_exec_machine *mach,
3061 const struct tgsi_full_instruction *inst,
3062 micro_quaternary_op op,
3063 enum tgsi_exec_datatype src_datatype)
3064 {
3065 unsigned int chan;
3066 struct tgsi_exec_vector dst;
3067
3068 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3069 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3070 union tgsi_exec_channel src[4];
3071
3072 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3073 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3074 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3075 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3076 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3077 }
3078 }
3079 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3080 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3081 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3082 }
3083 }
3084 }
3085
3086 static void
exec_dp3(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3087 exec_dp3(struct tgsi_exec_machine *mach,
3088 const struct tgsi_full_instruction *inst)
3089 {
3090 unsigned int chan;
3091 union tgsi_exec_channel arg[3];
3092
3093 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3094 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3095 micro_mul(&arg[2], &arg[0], &arg[1]);
3096
3097 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3098 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3099 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3100 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3101 }
3102
3103 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3104 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3105 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3106 }
3107 }
3108 }
3109
3110 static void
exec_dp4(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3111 exec_dp4(struct tgsi_exec_machine *mach,
3112 const struct tgsi_full_instruction *inst)
3113 {
3114 unsigned int chan;
3115 union tgsi_exec_channel arg[3];
3116
3117 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3118 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3119 micro_mul(&arg[2], &arg[0], &arg[1]);
3120
3121 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3122 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3123 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3124 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3125 }
3126
3127 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3128 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3129 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3130 }
3131 }
3132 }
3133
3134 static void
exec_dp2(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3135 exec_dp2(struct tgsi_exec_machine *mach,
3136 const struct tgsi_full_instruction *inst)
3137 {
3138 unsigned int chan;
3139 union tgsi_exec_channel arg[3];
3140
3141 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3142 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3143 micro_mul(&arg[2], &arg[0], &arg[1]);
3144
3145 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3146 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3147 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3148
3149 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3150 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3151 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3152 }
3153 }
3154 }
3155
3156 static void
exec_pk2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3157 exec_pk2h(struct tgsi_exec_machine *mach,
3158 const struct tgsi_full_instruction *inst)
3159 {
3160 unsigned chan;
3161 union tgsi_exec_channel arg[2], dst;
3162
3163 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3164 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3165 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3166 dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) |
3167 (_mesa_float_to_half(arg[1].f[chan]) << 16);
3168 }
3169 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3170 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3171 store_dest(mach, &dst, &inst->Dst[0], inst, chan);
3172 }
3173 }
3174 }
3175
3176 static void
exec_up2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3177 exec_up2h(struct tgsi_exec_machine *mach,
3178 const struct tgsi_full_instruction *inst)
3179 {
3180 unsigned chan;
3181 union tgsi_exec_channel arg, dst[2];
3182
3183 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3184 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3185 dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff);
3186 dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16);
3187 }
3188 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3189 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3190 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan);
3191 }
3192 }
3193 }
3194
3195 static void
micro_ucmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)3196 micro_ucmp(union tgsi_exec_channel *dst,
3197 const union tgsi_exec_channel *src0,
3198 const union tgsi_exec_channel *src1,
3199 const union tgsi_exec_channel *src2)
3200 {
3201 dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
3202 dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
3203 dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
3204 dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
3205 }
3206
3207 static void
exec_ucmp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3208 exec_ucmp(struct tgsi_exec_machine *mach,
3209 const struct tgsi_full_instruction *inst)
3210 {
3211 unsigned int chan;
3212 struct tgsi_exec_vector dst;
3213
3214 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3215 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3216 union tgsi_exec_channel src[3];
3217
3218 fetch_source(mach, &src[0], &inst->Src[0], chan,
3219 TGSI_EXEC_DATA_UINT);
3220 fetch_source(mach, &src[1], &inst->Src[1], chan,
3221 TGSI_EXEC_DATA_FLOAT);
3222 fetch_source(mach, &src[2], &inst->Src[2], chan,
3223 TGSI_EXEC_DATA_FLOAT);
3224 micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3225 }
3226 }
3227 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3228 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3229 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3230 }
3231 }
3232 }
3233
3234 static void
exec_dst(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3235 exec_dst(struct tgsi_exec_machine *mach,
3236 const struct tgsi_full_instruction *inst)
3237 {
3238 union tgsi_exec_channel r[2];
3239 union tgsi_exec_channel d[4];
3240
3241 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3242 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3243 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3244 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3245 }
3246 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3247 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3248 }
3249 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3250 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3251 }
3252
3253 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3254 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);
3255 }
3256 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3257 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);
3258 }
3259 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3260 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);
3261 }
3262 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3263 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W);
3264 }
3265 }
3266
3267 static void
exec_log(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3268 exec_log(struct tgsi_exec_machine *mach,
3269 const struct tgsi_full_instruction *inst)
3270 {
3271 union tgsi_exec_channel r[3];
3272
3273 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3274 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
3275 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
3276 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
3277 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3278 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
3279 }
3280 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3281 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
3282 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3283 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y);
3284 }
3285 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3286 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z);
3287 }
3288 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3289 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
3290 }
3291 }
3292
3293 static void
exec_exp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3294 exec_exp(struct tgsi_exec_machine *mach,
3295 const struct tgsi_full_instruction *inst)
3296 {
3297 union tgsi_exec_channel r[3];
3298
3299 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3300 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
3301 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3302 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
3303 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X);
3304 }
3305 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3306 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3307 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y);
3308 }
3309 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3310 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
3311 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z);
3312 }
3313 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3314 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
3315 }
3316 }
3317
3318 static void
exec_lit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3319 exec_lit(struct tgsi_exec_machine *mach,
3320 const struct tgsi_full_instruction *inst)
3321 {
3322 union tgsi_exec_channel r[3];
3323 union tgsi_exec_channel d[3];
3324
3325 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3326 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3327 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3328 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3329 micro_max(&r[1], &r[1], &ZeroVec);
3330
3331 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3332 micro_min(&r[2], &r[2], &P128Vec);
3333 micro_max(&r[2], &r[2], &M128Vec);
3334 micro_pow(&r[1], &r[1], &r[2]);
3335 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3336 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);
3337 }
3338 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3339 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3340 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);
3341 }
3342 }
3343 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3344 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);
3345 }
3346
3347 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3348 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
3349 }
3350 }
3351
3352 static void
exec_break(struct tgsi_exec_machine * mach)3353 exec_break(struct tgsi_exec_machine *mach)
3354 {
3355 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3356 /* turn off loop channels for each enabled exec channel */
3357 mach->LoopMask &= ~mach->ExecMask;
3358 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3359 UPDATE_EXEC_MASK(mach);
3360 } else {
3361 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3362
3363 mach->Switch.mask = 0x0;
3364
3365 UPDATE_EXEC_MASK(mach);
3366 }
3367 }
3368
3369 static void
exec_switch(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3370 exec_switch(struct tgsi_exec_machine *mach,
3371 const struct tgsi_full_instruction *inst)
3372 {
3373 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3374 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3375
3376 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3377 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3378 mach->Switch.mask = 0x0;
3379 mach->Switch.defaultMask = 0x0;
3380
3381 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3382 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3383
3384 UPDATE_EXEC_MASK(mach);
3385 }
3386
3387 static void
exec_case(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3388 exec_case(struct tgsi_exec_machine *mach,
3389 const struct tgsi_full_instruction *inst)
3390 {
3391 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3392 union tgsi_exec_channel src;
3393 uint mask = 0;
3394
3395 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3396
3397 if (mach->Switch.selector.u[0] == src.u[0]) {
3398 mask |= 0x1;
3399 }
3400 if (mach->Switch.selector.u[1] == src.u[1]) {
3401 mask |= 0x2;
3402 }
3403 if (mach->Switch.selector.u[2] == src.u[2]) {
3404 mask |= 0x4;
3405 }
3406 if (mach->Switch.selector.u[3] == src.u[3]) {
3407 mask |= 0x8;
3408 }
3409
3410 mach->Switch.defaultMask |= mask;
3411
3412 mach->Switch.mask |= mask & prevMask;
3413
3414 UPDATE_EXEC_MASK(mach);
3415 }
3416
3417 /* FIXME: this will only work if default is last */
3418 static void
exec_default(struct tgsi_exec_machine * mach)3419 exec_default(struct tgsi_exec_machine *mach)
3420 {
3421 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3422
3423 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3424
3425 UPDATE_EXEC_MASK(mach);
3426 }
3427
3428 static void
exec_endswitch(struct tgsi_exec_machine * mach)3429 exec_endswitch(struct tgsi_exec_machine *mach)
3430 {
3431 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3432 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3433
3434 UPDATE_EXEC_MASK(mach);
3435 }
3436
3437 typedef void (* micro_dop)(union tgsi_double_channel *dst,
3438 const union tgsi_double_channel *src);
3439
3440 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
3441 const union tgsi_double_channel *src0,
3442 union tgsi_exec_channel *src1);
3443
3444 typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
3445 const union tgsi_exec_channel *src);
3446
3447 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
3448 const union tgsi_double_channel *src);
3449
3450 static void
fetch_double_channel(struct tgsi_exec_machine * mach,union tgsi_double_channel * chan,const struct tgsi_full_src_register * reg,uint chan_0,uint chan_1)3451 fetch_double_channel(struct tgsi_exec_machine *mach,
3452 union tgsi_double_channel *chan,
3453 const struct tgsi_full_src_register *reg,
3454 uint chan_0,
3455 uint chan_1)
3456 {
3457 union tgsi_exec_channel src[2];
3458 uint i;
3459
3460 fetch_source_d(mach, &src[0], reg, chan_0);
3461 fetch_source_d(mach, &src[1], reg, chan_1);
3462
3463 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3464 chan->u[i][0] = src[0].u[i];
3465 chan->u[i][1] = src[1].u[i];
3466 }
3467 assert(!reg->Register.Absolute);
3468 assert(!reg->Register.Negate);
3469 }
3470
3471 static void
store_double_channel(struct tgsi_exec_machine * mach,const union tgsi_double_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,uint chan_0,uint chan_1)3472 store_double_channel(struct tgsi_exec_machine *mach,
3473 const union tgsi_double_channel *chan,
3474 const struct tgsi_full_dst_register *reg,
3475 const struct tgsi_full_instruction *inst,
3476 uint chan_0,
3477 uint chan_1)
3478 {
3479 union tgsi_exec_channel dst[2];
3480 uint i;
3481 union tgsi_double_channel temp;
3482 const uint execmask = mach->ExecMask;
3483
3484 if (!inst->Instruction.Saturate) {
3485 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3486 if (execmask & (1 << i)) {
3487 dst[0].u[i] = chan->u[i][0];
3488 dst[1].u[i] = chan->u[i][1];
3489 }
3490 }
3491 else {
3492 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3493 if (execmask & (1 << i)) {
3494 if (chan->d[i] < 0.0 || isnan(chan->d[i]))
3495 temp.d[i] = 0.0;
3496 else if (chan->d[i] > 1.0)
3497 temp.d[i] = 1.0;
3498 else
3499 temp.d[i] = chan->d[i];
3500
3501 dst[0].u[i] = temp.u[i][0];
3502 dst[1].u[i] = temp.u[i][1];
3503 }
3504 }
3505
3506 store_dest_double(mach, &dst[0], reg, chan_0);
3507 if (chan_1 != (unsigned)-1)
3508 store_dest_double(mach, &dst[1], reg, chan_1);
3509 }
3510
3511 static void
exec_double_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3512 exec_double_unary(struct tgsi_exec_machine *mach,
3513 const struct tgsi_full_instruction *inst,
3514 micro_dop op)
3515 {
3516 union tgsi_double_channel src;
3517 union tgsi_double_channel dst;
3518
3519 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3520 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3521 op(&dst, &src);
3522 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3523 }
3524 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3525 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3526 op(&dst, &src);
3527 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3528 }
3529 }
3530
3531 static void
exec_double_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op,enum tgsi_exec_datatype dst_datatype)3532 exec_double_binary(struct tgsi_exec_machine *mach,
3533 const struct tgsi_full_instruction *inst,
3534 micro_dop op,
3535 enum tgsi_exec_datatype dst_datatype)
3536 {
3537 union tgsi_double_channel src[2];
3538 union tgsi_double_channel dst;
3539 int first_dest_chan, second_dest_chan;
3540 int wmask;
3541
3542 wmask = inst->Dst[0].Register.WriteMask;
3543 /* these are & because of the way DSLT etc store their destinations */
3544 if (wmask & TGSI_WRITEMASK_XY) {
3545 first_dest_chan = TGSI_CHAN_X;
3546 second_dest_chan = TGSI_CHAN_Y;
3547 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3548 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
3549 second_dest_chan = -1;
3550 }
3551
3552 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3553 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3554 op(&dst, src);
3555 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3556 }
3557
3558 if (wmask & TGSI_WRITEMASK_ZW) {
3559 first_dest_chan = TGSI_CHAN_Z;
3560 second_dest_chan = TGSI_CHAN_W;
3561 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3562 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
3563 second_dest_chan = -1;
3564 }
3565
3566 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3567 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3568 op(&dst, src);
3569 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3570 }
3571 }
3572
3573 static void
exec_double_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3574 exec_double_trinary(struct tgsi_exec_machine *mach,
3575 const struct tgsi_full_instruction *inst,
3576 micro_dop op)
3577 {
3578 union tgsi_double_channel src[3];
3579 union tgsi_double_channel dst;
3580
3581 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3582 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3583 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3584 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
3585 op(&dst, src);
3586 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3587 }
3588 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3589 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3590 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3591 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
3592 op(&dst, src);
3593 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3594 }
3595 }
3596
3597 static void
exec_dldexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3598 exec_dldexp(struct tgsi_exec_machine *mach,
3599 const struct tgsi_full_instruction *inst)
3600 {
3601 union tgsi_double_channel src0;
3602 union tgsi_exec_channel src1;
3603 union tgsi_double_channel dst;
3604 int wmask;
3605
3606 wmask = inst->Dst[0].Register.WriteMask;
3607 if (wmask & TGSI_WRITEMASK_XY) {
3608 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3609 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3610 micro_dldexp(&dst, &src0, &src1);
3611 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3612 }
3613
3614 if (wmask & TGSI_WRITEMASK_ZW) {
3615 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3616 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3617 micro_dldexp(&dst, &src0, &src1);
3618 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3619 }
3620 }
3621
3622 static void
exec_dfracexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3623 exec_dfracexp(struct tgsi_exec_machine *mach,
3624 const struct tgsi_full_instruction *inst)
3625 {
3626 union tgsi_double_channel src;
3627 union tgsi_double_channel dst;
3628 union tgsi_exec_channel dst_exp;
3629
3630 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3631 micro_dfracexp(&dst, &dst_exp, &src);
3632 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY)
3633 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3634 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW)
3635 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3636 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3637 if (inst->Dst[1].Register.WriteMask & (1 << chan))
3638 store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan);
3639 }
3640 }
3641
3642 static void
exec_arg0_64_arg1_32(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_sop op)3643 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
3644 const struct tgsi_full_instruction *inst,
3645 micro_dop_sop op)
3646 {
3647 union tgsi_double_channel src0;
3648 union tgsi_exec_channel src1;
3649 union tgsi_double_channel dst;
3650 int wmask;
3651
3652 wmask = inst->Dst[0].Register.WriteMask;
3653 if (wmask & TGSI_WRITEMASK_XY) {
3654 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3655 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3656 op(&dst, &src0, &src1);
3657 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3658 }
3659
3660 if (wmask & TGSI_WRITEMASK_ZW) {
3661 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3662 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3663 op(&dst, &src0, &src1);
3664 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3665 }
3666 }
3667
3668 static int
get_image_coord_dim(unsigned tgsi_tex)3669 get_image_coord_dim(unsigned tgsi_tex)
3670 {
3671 int dim;
3672 switch (tgsi_tex) {
3673 case TGSI_TEXTURE_BUFFER:
3674 case TGSI_TEXTURE_1D:
3675 dim = 1;
3676 break;
3677 case TGSI_TEXTURE_2D:
3678 case TGSI_TEXTURE_RECT:
3679 case TGSI_TEXTURE_1D_ARRAY:
3680 case TGSI_TEXTURE_2D_MSAA:
3681 dim = 2;
3682 break;
3683 case TGSI_TEXTURE_3D:
3684 case TGSI_TEXTURE_CUBE:
3685 case TGSI_TEXTURE_2D_ARRAY:
3686 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3687 case TGSI_TEXTURE_CUBE_ARRAY:
3688 dim = 3;
3689 break;
3690 default:
3691 assert(!"unknown texture target");
3692 dim = 0;
3693 break;
3694 }
3695
3696 return dim;
3697 }
3698
3699 static int
get_image_coord_sample(unsigned tgsi_tex)3700 get_image_coord_sample(unsigned tgsi_tex)
3701 {
3702 int sample = 0;
3703 switch (tgsi_tex) {
3704 case TGSI_TEXTURE_2D_MSAA:
3705 sample = 3;
3706 break;
3707 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3708 sample = 4;
3709 break;
3710 default:
3711 break;
3712 }
3713 return sample;
3714 }
3715
3716 static void
exec_load_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3717 exec_load_img(struct tgsi_exec_machine *mach,
3718 const struct tgsi_full_instruction *inst)
3719 {
3720 union tgsi_exec_channel r[4], sample_r;
3721 uint unit;
3722 int sample;
3723 int i, j;
3724 int dim;
3725 uint chan;
3726 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3727 struct tgsi_image_params params;
3728
3729 unit = fetch_sampler_unit(mach, inst, 0);
3730 dim = get_image_coord_dim(inst->Memory.Texture);
3731 sample = get_image_coord_sample(inst->Memory.Texture);
3732 assert(dim <= 3);
3733
3734 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3735 params.unit = unit;
3736 params.tgsi_tex_instr = inst->Memory.Texture;
3737 params.format = inst->Memory.Format;
3738
3739 for (i = 0; i < dim; i++) {
3740 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3741 }
3742
3743 if (sample)
3744 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3745
3746 mach->Image->load(mach->Image, ¶ms,
3747 r[0].i, r[1].i, r[2].i, sample_r.i,
3748 rgba);
3749 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3750 r[0].f[j] = rgba[0][j];
3751 r[1].f[j] = rgba[1][j];
3752 r[2].f[j] = rgba[2][j];
3753 r[3].f[j] = rgba[3][j];
3754 }
3755 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3756 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3757 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
3758 }
3759 }
3760 }
3761
3762 static void
exec_load_membuf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3763 exec_load_membuf(struct tgsi_exec_machine *mach,
3764 const struct tgsi_full_instruction *inst)
3765 {
3766 uint32_t unit = fetch_sampler_unit(mach, inst, 0);
3767
3768 uint32_t size;
3769 const char *ptr;
3770 switch (inst->Src[0].Register.File) {
3771 case TGSI_FILE_MEMORY:
3772 ptr = mach->LocalMem;
3773 size = mach->LocalMemSize;
3774 break;
3775
3776 case TGSI_FILE_BUFFER:
3777 ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);
3778 break;
3779
3780 case TGSI_FILE_CONSTANT:
3781 if (unit < ARRAY_SIZE(mach->Consts)) {
3782 ptr = mach->Consts[unit];
3783 size = mach->ConstsSize[unit];
3784 } else {
3785 ptr = NULL;
3786 size = 0;
3787 }
3788 break;
3789
3790 default:
3791 unreachable("unsupported TGSI_OPCODE_LOAD file");
3792 }
3793
3794 union tgsi_exec_channel offset;
3795 IFETCH(&offset, 1, TGSI_CHAN_X);
3796
3797 assert(inst->Dst[0].Register.WriteMask);
3798 uint32_t load_size = util_last_bit(inst->Dst[0].Register.WriteMask) * 4;
3799
3800 union tgsi_exec_channel rgba[TGSI_NUM_CHANNELS];
3801 memset(&rgba, 0, sizeof(rgba));
3802 for (int j = 0; j < TGSI_QUAD_SIZE; j++) {
3803 if (size >= load_size && offset.u[j] <= (size - load_size)) {
3804 for (int chan = 0; chan < load_size / 4; chan++)
3805 rgba[chan].u[j] = *(uint32_t *)(ptr + offset.u[j] + chan * 4);
3806 }
3807 }
3808
3809 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3810 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3811 store_dest(mach, &rgba[chan], &inst->Dst[0], inst, chan);
3812 }
3813 }
3814 }
3815
3816 static void
exec_load(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3817 exec_load(struct tgsi_exec_machine *mach,
3818 const struct tgsi_full_instruction *inst)
3819 {
3820 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
3821 exec_load_img(mach, inst);
3822 else
3823 exec_load_membuf(mach, inst);
3824 }
3825
3826 static uint
fetch_store_img_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_dst_register * dst)3827 fetch_store_img_unit(struct tgsi_exec_machine *mach,
3828 const struct tgsi_full_dst_register *dst)
3829 {
3830 uint unit = 0;
3831 int i;
3832 if (dst->Register.Indirect) {
3833 union tgsi_exec_channel indir_index, index2;
3834 const uint execmask = mach->ExecMask;
3835 index2.i[0] =
3836 index2.i[1] =
3837 index2.i[2] =
3838 index2.i[3] = dst->Indirect.Index;
3839
3840 fetch_src_file_channel(mach,
3841 dst->Indirect.File,
3842 dst->Indirect.Swizzle,
3843 &index2,
3844 &ZeroVec,
3845 &indir_index);
3846 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3847 if (execmask & (1 << i)) {
3848 unit = dst->Register.Index + indir_index.i[i];
3849 break;
3850 }
3851 }
3852 } else {
3853 unit = dst->Register.Index;
3854 }
3855 return unit;
3856 }
3857
3858 static void
exec_store_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3859 exec_store_img(struct tgsi_exec_machine *mach,
3860 const struct tgsi_full_instruction *inst)
3861 {
3862 union tgsi_exec_channel r[3], sample_r;
3863 union tgsi_exec_channel value[4];
3864 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3865 struct tgsi_image_params params;
3866 int dim;
3867 int sample;
3868 int i, j;
3869 uint unit;
3870 unit = fetch_store_img_unit(mach, &inst->Dst[0]);
3871 dim = get_image_coord_dim(inst->Memory.Texture);
3872 sample = get_image_coord_sample(inst->Memory.Texture);
3873 assert(dim <= 3);
3874
3875 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3876 params.unit = unit;
3877 params.tgsi_tex_instr = inst->Memory.Texture;
3878 params.format = inst->Memory.Format;
3879
3880 for (i = 0; i < dim; i++) {
3881 IFETCH(&r[i], 0, TGSI_CHAN_X + i);
3882 }
3883
3884 for (i = 0; i < 4; i++) {
3885 FETCH(&value[i], 1, TGSI_CHAN_X + i);
3886 }
3887 if (sample)
3888 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
3889
3890 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3891 rgba[0][j] = value[0].f[j];
3892 rgba[1][j] = value[1].f[j];
3893 rgba[2][j] = value[2].f[j];
3894 rgba[3][j] = value[3].f[j];
3895 }
3896
3897 mach->Image->store(mach->Image, ¶ms,
3898 r[0].i, r[1].i, r[2].i, sample_r.i,
3899 rgba);
3900 }
3901
3902 static void
exec_store_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3903 exec_store_buf(struct tgsi_exec_machine *mach,
3904 const struct tgsi_full_instruction *inst)
3905 {
3906 uint32_t unit = fetch_store_img_unit(mach, &inst->Dst[0]);
3907 uint32_t size;
3908 char *ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);
3909
3910 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3911
3912 union tgsi_exec_channel offset;
3913 IFETCH(&offset, 0, TGSI_CHAN_X);
3914
3915 union tgsi_exec_channel value[4];
3916 for (int i = 0; i < 4; i++)
3917 FETCH(&value[i], 1, TGSI_CHAN_X + i);
3918
3919 for (int j = 0; j < TGSI_QUAD_SIZE; j++) {
3920 if (!(execmask & (1 << j)))
3921 continue;
3922 if (size < offset.u[j])
3923 continue;
3924
3925 uint32_t *invocation_ptr = (uint32_t *)(ptr + offset.u[j]);
3926 uint32_t size_avail = size - offset.u[j];
3927
3928 for (int chan = 0; chan < MIN2(4, size_avail / 4); chan++) {
3929 if (inst->Dst[0].Register.WriteMask & (1 << chan))
3930 memcpy(&invocation_ptr[chan], &value[chan].u[j], 4);
3931 }
3932 }
3933 }
3934
3935 static void
exec_store_mem(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3936 exec_store_mem(struct tgsi_exec_machine *mach,
3937 const struct tgsi_full_instruction *inst)
3938 {
3939 union tgsi_exec_channel r[3];
3940 union tgsi_exec_channel value[4];
3941 uint i, chan;
3942 char *ptr = mach->LocalMem;
3943 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3944
3945 IFETCH(&r[0], 0, TGSI_CHAN_X);
3946
3947 for (i = 0; i < 4; i++) {
3948 FETCH(&value[i], 1, TGSI_CHAN_X + i);
3949 }
3950
3951 if (r[0].u[0] >= mach->LocalMemSize)
3952 return;
3953 ptr += r[0].u[0];
3954
3955 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3956 if (execmask & (1 << i)) {
3957 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3958 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3959 memcpy(ptr + (chan * 4), &value[chan].u[0], 4);
3960 }
3961 }
3962 }
3963 }
3964 }
3965
3966 static void
exec_store(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3967 exec_store(struct tgsi_exec_machine *mach,
3968 const struct tgsi_full_instruction *inst)
3969 {
3970 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
3971 exec_store_img(mach, inst);
3972 else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER)
3973 exec_store_buf(mach, inst);
3974 else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY)
3975 exec_store_mem(mach, inst);
3976 }
3977
3978 static void
exec_atomop_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3979 exec_atomop_img(struct tgsi_exec_machine *mach,
3980 const struct tgsi_full_instruction *inst)
3981 {
3982 union tgsi_exec_channel r[4], sample_r;
3983 union tgsi_exec_channel value[4], value2[4];
3984 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3985 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3986 struct tgsi_image_params params;
3987 int dim;
3988 int sample;
3989 int i, j;
3990 uint unit, chan;
3991 unit = fetch_sampler_unit(mach, inst, 0);
3992 dim = get_image_coord_dim(inst->Memory.Texture);
3993 sample = get_image_coord_sample(inst->Memory.Texture);
3994 assert(dim <= 3);
3995
3996 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3997 params.unit = unit;
3998 params.tgsi_tex_instr = inst->Memory.Texture;
3999 params.format = inst->Memory.Format;
4000
4001 for (i = 0; i < dim; i++) {
4002 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
4003 }
4004
4005 for (i = 0; i < 4; i++) {
4006 FETCH(&value[i], 2, TGSI_CHAN_X + i);
4007 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4008 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
4009 }
4010 if (sample)
4011 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
4012
4013 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4014 rgba[0][j] = value[0].f[j];
4015 rgba[1][j] = value[1].f[j];
4016 rgba[2][j] = value[2].f[j];
4017 rgba[3][j] = value[3].f[j];
4018 }
4019 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
4020 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4021 rgba2[0][j] = value2[0].f[j];
4022 rgba2[1][j] = value2[1].f[j];
4023 rgba2[2][j] = value2[2].f[j];
4024 rgba2[3][j] = value2[3].f[j];
4025 }
4026 }
4027
4028 mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode,
4029 r[0].i, r[1].i, r[2].i, sample_r.i,
4030 rgba, rgba2);
4031
4032 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
4033 r[0].f[j] = rgba[0][j];
4034 r[1].f[j] = rgba[1][j];
4035 r[2].f[j] = rgba[2][j];
4036 r[3].f[j] = rgba[3][j];
4037 }
4038 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4039 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4040 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
4041 }
4042 }
4043 }
4044
4045 static void
exec_atomop_membuf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4046 exec_atomop_membuf(struct tgsi_exec_machine *mach,
4047 const struct tgsi_full_instruction *inst)
4048 {
4049 union tgsi_exec_channel offset, r0, r1;
4050 uint chan, i;
4051 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
4052 IFETCH(&offset, 1, TGSI_CHAN_X);
4053
4054 if (!(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X))
4055 return;
4056
4057 void *ptr[TGSI_QUAD_SIZE];
4058 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
4059 uint32_t unit = fetch_sampler_unit(mach, inst, 0);
4060 uint32_t size;
4061 char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size);
4062 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
4063 if (likely(size >= 4 && offset.u[i] <= size - 4))
4064 ptr[i] = buffer + offset.u[i];
4065 else
4066 ptr[i] = NULL;
4067 }
4068 } else {
4069 assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY);
4070
4071 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4072 if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4))
4073 ptr[i] = (char *)mach->LocalMem + offset.u[i];
4074 else
4075 ptr[i] = NULL;
4076 }
4077 }
4078
4079 FETCH(&r0, 2, TGSI_CHAN_X);
4080 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
4081 FETCH(&r1, 3, TGSI_CHAN_X);
4082
4083 /* The load/op/store sequence has to happen inside the loop since ptr
4084 * may have the same ptr in some of the invocations.
4085 */
4086 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
4087 if (!(execmask & (1 << i)))
4088 continue;
4089
4090 uint32_t val = 0;
4091 if (ptr[i]) {
4092 memcpy(&val, ptr[i], sizeof(val));
4093
4094 uint32_t result;
4095 switch (inst->Instruction.Opcode) {
4096 case TGSI_OPCODE_ATOMUADD:
4097 result = val + r0.u[i];
4098 break;
4099 case TGSI_OPCODE_ATOMXOR:
4100 result = val ^ r0.u[i];
4101 break;
4102 case TGSI_OPCODE_ATOMOR:
4103 result = val | r0.u[i];
4104 break;
4105 case TGSI_OPCODE_ATOMAND:
4106 result = val & r0.u[i];
4107 break;
4108 case TGSI_OPCODE_ATOMUMIN:
4109 result = MIN2(val, r0.u[i]);
4110 break;
4111 case TGSI_OPCODE_ATOMUMAX:
4112 result = MAX2(val, r0.u[i]);
4113 break;
4114 case TGSI_OPCODE_ATOMIMIN:
4115 result = MIN2((int32_t)val, r0.i[i]);
4116 break;
4117 case TGSI_OPCODE_ATOMIMAX:
4118 result = MAX2((int32_t)val, r0.i[i]);
4119 break;
4120 case TGSI_OPCODE_ATOMXCHG:
4121 result = r0.u[i];
4122 break;
4123 case TGSI_OPCODE_ATOMCAS:
4124 if (val == r0.u[i])
4125 result = r1.u[i];
4126 else
4127 result = val;
4128 break;
4129 case TGSI_OPCODE_ATOMFADD:
4130 result = fui(uif(val) + r0.f[i]);
4131 break;
4132 default:
4133 unreachable("bad atomic op");
4134 }
4135 memcpy(ptr[i], &result, sizeof(result));
4136 }
4137
4138 r0.u[i] = val;
4139 }
4140
4141 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
4142 store_dest(mach, &r0, &inst->Dst[0], inst, chan);
4143 }
4144
4145 static void
exec_atomop(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4146 exec_atomop(struct tgsi_exec_machine *mach,
4147 const struct tgsi_full_instruction *inst)
4148 {
4149 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4150 exec_atomop_img(mach, inst);
4151 else
4152 exec_atomop_membuf(mach, inst);
4153 }
4154
4155 static void
exec_resq_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4156 exec_resq_img(struct tgsi_exec_machine *mach,
4157 const struct tgsi_full_instruction *inst)
4158 {
4159 int result[4];
4160 union tgsi_exec_channel r[4];
4161 uint unit;
4162 int i, chan, j;
4163 struct tgsi_image_params params;
4164
4165 unit = fetch_sampler_unit(mach, inst, 0);
4166
4167 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
4168 params.unit = unit;
4169 params.tgsi_tex_instr = inst->Memory.Texture;
4170 params.format = inst->Memory.Format;
4171
4172 mach->Image->get_dims(mach->Image, ¶ms, result);
4173
4174 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4175 for (j = 0; j < 4; j++) {
4176 r[j].i[i] = result[j];
4177 }
4178 }
4179
4180 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4181 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4182 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
4183 }
4184 }
4185 }
4186
4187 static void
exec_resq_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4188 exec_resq_buf(struct tgsi_exec_machine *mach,
4189 const struct tgsi_full_instruction *inst)
4190 {
4191 uint32_t unit = fetch_sampler_unit(mach, inst, 0);
4192 uint32_t size;
4193 (void)mach->Buffer->lookup(mach->Buffer, unit, &size);
4194
4195 union tgsi_exec_channel r;
4196 for (int i = 0; i < TGSI_QUAD_SIZE; i++)
4197 r.i[i] = size;
4198
4199 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
4200 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4201 store_dest(mach, &r, &inst->Dst[0], inst, TGSI_CHAN_X);
4202 }
4203 }
4204 }
4205
4206 static void
exec_resq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4207 exec_resq(struct tgsi_exec_machine *mach,
4208 const struct tgsi_full_instruction *inst)
4209 {
4210 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4211 exec_resq_img(mach, inst);
4212 else
4213 exec_resq_buf(mach, inst);
4214 }
4215
4216 static void
micro_f2u64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4217 micro_f2u64(union tgsi_double_channel *dst,
4218 const union tgsi_exec_channel *src)
4219 {
4220 dst->u64[0] = (uint64_t)src->f[0];
4221 dst->u64[1] = (uint64_t)src->f[1];
4222 dst->u64[2] = (uint64_t)src->f[2];
4223 dst->u64[3] = (uint64_t)src->f[3];
4224 }
4225
4226 static void
micro_f2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4227 micro_f2i64(union tgsi_double_channel *dst,
4228 const union tgsi_exec_channel *src)
4229 {
4230 dst->i64[0] = (int64_t)src->f[0];
4231 dst->i64[1] = (int64_t)src->f[1];
4232 dst->i64[2] = (int64_t)src->f[2];
4233 dst->i64[3] = (int64_t)src->f[3];
4234 }
4235
4236 static void
micro_u2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4237 micro_u2i64(union tgsi_double_channel *dst,
4238 const union tgsi_exec_channel *src)
4239 {
4240 dst->u64[0] = (uint64_t)src->u[0];
4241 dst->u64[1] = (uint64_t)src->u[1];
4242 dst->u64[2] = (uint64_t)src->u[2];
4243 dst->u64[3] = (uint64_t)src->u[3];
4244 }
4245
4246 static void
micro_i2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4247 micro_i2i64(union tgsi_double_channel *dst,
4248 const union tgsi_exec_channel *src)
4249 {
4250 dst->i64[0] = (int64_t)src->i[0];
4251 dst->i64[1] = (int64_t)src->i[1];
4252 dst->i64[2] = (int64_t)src->i[2];
4253 dst->i64[3] = (int64_t)src->i[3];
4254 }
4255
4256 static void
micro_d2u64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4257 micro_d2u64(union tgsi_double_channel *dst,
4258 const union tgsi_double_channel *src)
4259 {
4260 dst->u64[0] = (uint64_t)src->d[0];
4261 dst->u64[1] = (uint64_t)src->d[1];
4262 dst->u64[2] = (uint64_t)src->d[2];
4263 dst->u64[3] = (uint64_t)src->d[3];
4264 }
4265
4266 static void
micro_d2i64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4267 micro_d2i64(union tgsi_double_channel *dst,
4268 const union tgsi_double_channel *src)
4269 {
4270 dst->i64[0] = (int64_t)src->d[0];
4271 dst->i64[1] = (int64_t)src->d[1];
4272 dst->i64[2] = (int64_t)src->d[2];
4273 dst->i64[3] = (int64_t)src->d[3];
4274 }
4275
4276 static void
micro_u642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4277 micro_u642d(union tgsi_double_channel *dst,
4278 const union tgsi_double_channel *src)
4279 {
4280 dst->d[0] = (double)src->u64[0];
4281 dst->d[1] = (double)src->u64[1];
4282 dst->d[2] = (double)src->u64[2];
4283 dst->d[3] = (double)src->u64[3];
4284 }
4285
4286 static void
micro_i642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4287 micro_i642d(union tgsi_double_channel *dst,
4288 const union tgsi_double_channel *src)
4289 {
4290 dst->d[0] = (double)src->i64[0];
4291 dst->d[1] = (double)src->i64[1];
4292 dst->d[2] = (double)src->i64[2];
4293 dst->d[3] = (double)src->i64[3];
4294 }
4295
4296 static void
micro_u642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4297 micro_u642f(union tgsi_exec_channel *dst,
4298 const union tgsi_double_channel *src)
4299 {
4300 dst->f[0] = (float)src->u64[0];
4301 dst->f[1] = (float)src->u64[1];
4302 dst->f[2] = (float)src->u64[2];
4303 dst->f[3] = (float)src->u64[3];
4304 }
4305
4306 static void
micro_i642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4307 micro_i642f(union tgsi_exec_channel *dst,
4308 const union tgsi_double_channel *src)
4309 {
4310 dst->f[0] = (float)src->i64[0];
4311 dst->f[1] = (float)src->i64[1];
4312 dst->f[2] = (float)src->i64[2];
4313 dst->f[3] = (float)src->i64[3];
4314 }
4315
4316 static void
exec_t_2_64(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_s op,enum tgsi_exec_datatype src_datatype)4317 exec_t_2_64(struct tgsi_exec_machine *mach,
4318 const struct tgsi_full_instruction *inst,
4319 micro_dop_s op,
4320 enum tgsi_exec_datatype src_datatype)
4321 {
4322 union tgsi_exec_channel src;
4323 union tgsi_double_channel dst;
4324
4325 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
4326 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
4327 op(&dst, &src);
4328 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
4329 }
4330 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
4331 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
4332 op(&dst, &src);
4333 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
4334 }
4335 }
4336
4337 static void
exec_64_2_t(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_sop_d op)4338 exec_64_2_t(struct tgsi_exec_machine *mach,
4339 const struct tgsi_full_instruction *inst,
4340 micro_sop_d op)
4341 {
4342 union tgsi_double_channel src;
4343 union tgsi_exec_channel dst;
4344 int wm = inst->Dst[0].Register.WriteMask;
4345 int i;
4346 int bit;
4347 for (i = 0; i < 2; i++) {
4348 bit = ffs(wm);
4349 if (bit) {
4350 wm &= ~(1 << (bit - 1));
4351 if (i == 0)
4352 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
4353 else
4354 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
4355 op(&dst, &src);
4356 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1);
4357 }
4358 }
4359 }
4360
4361 static void
micro_i2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4362 micro_i2f(union tgsi_exec_channel *dst,
4363 const union tgsi_exec_channel *src)
4364 {
4365 dst->f[0] = (float)src->i[0];
4366 dst->f[1] = (float)src->i[1];
4367 dst->f[2] = (float)src->i[2];
4368 dst->f[3] = (float)src->i[3];
4369 }
4370
4371 static void
micro_not(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4372 micro_not(union tgsi_exec_channel *dst,
4373 const union tgsi_exec_channel *src)
4374 {
4375 dst->u[0] = ~src->u[0];
4376 dst->u[1] = ~src->u[1];
4377 dst->u[2] = ~src->u[2];
4378 dst->u[3] = ~src->u[3];
4379 }
4380
4381 static void
micro_shl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4382 micro_shl(union tgsi_exec_channel *dst,
4383 const union tgsi_exec_channel *src0,
4384 const union tgsi_exec_channel *src1)
4385 {
4386 unsigned masked_count;
4387 masked_count = src1->u[0] & 0x1f;
4388 dst->u[0] = src0->u[0] << masked_count;
4389 masked_count = src1->u[1] & 0x1f;
4390 dst->u[1] = src0->u[1] << masked_count;
4391 masked_count = src1->u[2] & 0x1f;
4392 dst->u[2] = src0->u[2] << masked_count;
4393 masked_count = src1->u[3] & 0x1f;
4394 dst->u[3] = src0->u[3] << masked_count;
4395 }
4396
4397 static void
micro_and(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4398 micro_and(union tgsi_exec_channel *dst,
4399 const union tgsi_exec_channel *src0,
4400 const union tgsi_exec_channel *src1)
4401 {
4402 dst->u[0] = src0->u[0] & src1->u[0];
4403 dst->u[1] = src0->u[1] & src1->u[1];
4404 dst->u[2] = src0->u[2] & src1->u[2];
4405 dst->u[3] = src0->u[3] & src1->u[3];
4406 }
4407
4408 static void
micro_or(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4409 micro_or(union tgsi_exec_channel *dst,
4410 const union tgsi_exec_channel *src0,
4411 const union tgsi_exec_channel *src1)
4412 {
4413 dst->u[0] = src0->u[0] | src1->u[0];
4414 dst->u[1] = src0->u[1] | src1->u[1];
4415 dst->u[2] = src0->u[2] | src1->u[2];
4416 dst->u[3] = src0->u[3] | src1->u[3];
4417 }
4418
4419 static void
micro_xor(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4420 micro_xor(union tgsi_exec_channel *dst,
4421 const union tgsi_exec_channel *src0,
4422 const union tgsi_exec_channel *src1)
4423 {
4424 dst->u[0] = src0->u[0] ^ src1->u[0];
4425 dst->u[1] = src0->u[1] ^ src1->u[1];
4426 dst->u[2] = src0->u[2] ^ src1->u[2];
4427 dst->u[3] = src0->u[3] ^ src1->u[3];
4428 }
4429
4430 static void
micro_mod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4431 micro_mod(union tgsi_exec_channel *dst,
4432 const union tgsi_exec_channel *src0,
4433 const union tgsi_exec_channel *src1)
4434 {
4435 dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;
4436 dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;
4437 dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;
4438 dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;
4439 }
4440
4441 static void
micro_f2i(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4442 micro_f2i(union tgsi_exec_channel *dst,
4443 const union tgsi_exec_channel *src)
4444 {
4445 dst->i[0] = (int)src->f[0];
4446 dst->i[1] = (int)src->f[1];
4447 dst->i[2] = (int)src->f[2];
4448 dst->i[3] = (int)src->f[3];
4449 }
4450
4451 static void
micro_fseq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4452 micro_fseq(union tgsi_exec_channel *dst,
4453 const union tgsi_exec_channel *src0,
4454 const union tgsi_exec_channel *src1)
4455 {
4456 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4457 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4458 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4459 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4460 }
4461
4462 static void
micro_fsge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4463 micro_fsge(union tgsi_exec_channel *dst,
4464 const union tgsi_exec_channel *src0,
4465 const union tgsi_exec_channel *src1)
4466 {
4467 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4468 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4469 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4470 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4471 }
4472
4473 static void
micro_fslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4474 micro_fslt(union tgsi_exec_channel *dst,
4475 const union tgsi_exec_channel *src0,
4476 const union tgsi_exec_channel *src1)
4477 {
4478 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4479 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4480 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4481 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4482 }
4483
4484 static void
micro_fsne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4485 micro_fsne(union tgsi_exec_channel *dst,
4486 const union tgsi_exec_channel *src0,
4487 const union tgsi_exec_channel *src1)
4488 {
4489 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4490 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4491 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4492 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4493 }
4494
4495 static void
micro_idiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4496 micro_idiv(union tgsi_exec_channel *dst,
4497 const union tgsi_exec_channel *src0,
4498 const union tgsi_exec_channel *src1)
4499 {
4500 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4501 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4502 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4503 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
4504 }
4505
4506 static void
micro_imax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4507 micro_imax(union tgsi_exec_channel *dst,
4508 const union tgsi_exec_channel *src0,
4509 const union tgsi_exec_channel *src1)
4510 {
4511 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4512 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4513 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4514 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4515 }
4516
4517 static void
micro_imin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4518 micro_imin(union tgsi_exec_channel *dst,
4519 const union tgsi_exec_channel *src0,
4520 const union tgsi_exec_channel *src1)
4521 {
4522 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4523 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4524 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4525 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4526 }
4527
4528 static void
micro_isge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4529 micro_isge(union tgsi_exec_channel *dst,
4530 const union tgsi_exec_channel *src0,
4531 const union tgsi_exec_channel *src1)
4532 {
4533 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4534 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4535 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4536 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
4537 }
4538
4539 static void
micro_ishr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4540 micro_ishr(union tgsi_exec_channel *dst,
4541 const union tgsi_exec_channel *src0,
4542 const union tgsi_exec_channel *src1)
4543 {
4544 unsigned masked_count;
4545 masked_count = src1->i[0] & 0x1f;
4546 dst->i[0] = src0->i[0] >> masked_count;
4547 masked_count = src1->i[1] & 0x1f;
4548 dst->i[1] = src0->i[1] >> masked_count;
4549 masked_count = src1->i[2] & 0x1f;
4550 dst->i[2] = src0->i[2] >> masked_count;
4551 masked_count = src1->i[3] & 0x1f;
4552 dst->i[3] = src0->i[3] >> masked_count;
4553 }
4554
4555 static void
micro_islt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4556 micro_islt(union tgsi_exec_channel *dst,
4557 const union tgsi_exec_channel *src0,
4558 const union tgsi_exec_channel *src1)
4559 {
4560 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4561 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4562 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4563 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4564 }
4565
4566 static void
micro_f2u(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4567 micro_f2u(union tgsi_exec_channel *dst,
4568 const union tgsi_exec_channel *src)
4569 {
4570 dst->u[0] = (uint)src->f[0];
4571 dst->u[1] = (uint)src->f[1];
4572 dst->u[2] = (uint)src->f[2];
4573 dst->u[3] = (uint)src->f[3];
4574 }
4575
4576 static void
micro_u2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4577 micro_u2f(union tgsi_exec_channel *dst,
4578 const union tgsi_exec_channel *src)
4579 {
4580 dst->f[0] = (float)src->u[0];
4581 dst->f[1] = (float)src->u[1];
4582 dst->f[2] = (float)src->u[2];
4583 dst->f[3] = (float)src->u[3];
4584 }
4585
4586 static void
micro_uadd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4587 micro_uadd(union tgsi_exec_channel *dst,
4588 const union tgsi_exec_channel *src0,
4589 const union tgsi_exec_channel *src1)
4590 {
4591 dst->u[0] = src0->u[0] + src1->u[0];
4592 dst->u[1] = src0->u[1] + src1->u[1];
4593 dst->u[2] = src0->u[2] + src1->u[2];
4594 dst->u[3] = src0->u[3] + src1->u[3];
4595 }
4596
4597 static void
micro_udiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4598 micro_udiv(union tgsi_exec_channel *dst,
4599 const union tgsi_exec_channel *src0,
4600 const union tgsi_exec_channel *src1)
4601 {
4602 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4603 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4604 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4605 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4606 }
4607
4608 static void
micro_umad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4609 micro_umad(union tgsi_exec_channel *dst,
4610 const union tgsi_exec_channel *src0,
4611 const union tgsi_exec_channel *src1,
4612 const union tgsi_exec_channel *src2)
4613 {
4614 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4615 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4616 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4617 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4618 }
4619
4620 static void
micro_umax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4621 micro_umax(union tgsi_exec_channel *dst,
4622 const union tgsi_exec_channel *src0,
4623 const union tgsi_exec_channel *src1)
4624 {
4625 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4626 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4627 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4628 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4629 }
4630
4631 static void
micro_umin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4632 micro_umin(union tgsi_exec_channel *dst,
4633 const union tgsi_exec_channel *src0,
4634 const union tgsi_exec_channel *src1)
4635 {
4636 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4637 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4638 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4639 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4640 }
4641
4642 static void
micro_umod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4643 micro_umod(union tgsi_exec_channel *dst,
4644 const union tgsi_exec_channel *src0,
4645 const union tgsi_exec_channel *src1)
4646 {
4647 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4648 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4649 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4650 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4651 }
4652
4653 static void
micro_umul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4654 micro_umul(union tgsi_exec_channel *dst,
4655 const union tgsi_exec_channel *src0,
4656 const union tgsi_exec_channel *src1)
4657 {
4658 dst->u[0] = src0->u[0] * src1->u[0];
4659 dst->u[1] = src0->u[1] * src1->u[1];
4660 dst->u[2] = src0->u[2] * src1->u[2];
4661 dst->u[3] = src0->u[3] * src1->u[3];
4662 }
4663
4664 static void
micro_imul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4665 micro_imul_hi(union tgsi_exec_channel *dst,
4666 const union tgsi_exec_channel *src0,
4667 const union tgsi_exec_channel *src1)
4668 {
4669 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4670 dst->i[0] = I64M(src0->i[0], src1->i[0]);
4671 dst->i[1] = I64M(src0->i[1], src1->i[1]);
4672 dst->i[2] = I64M(src0->i[2], src1->i[2]);
4673 dst->i[3] = I64M(src0->i[3], src1->i[3]);
4674 #undef I64M
4675 }
4676
4677 static void
micro_umul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4678 micro_umul_hi(union tgsi_exec_channel *dst,
4679 const union tgsi_exec_channel *src0,
4680 const union tgsi_exec_channel *src1)
4681 {
4682 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4683 dst->u[0] = U64M(src0->u[0], src1->u[0]);
4684 dst->u[1] = U64M(src0->u[1], src1->u[1]);
4685 dst->u[2] = U64M(src0->u[2], src1->u[2]);
4686 dst->u[3] = U64M(src0->u[3], src1->u[3]);
4687 #undef U64M
4688 }
4689
4690 static void
micro_useq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4691 micro_useq(union tgsi_exec_channel *dst,
4692 const union tgsi_exec_channel *src0,
4693 const union tgsi_exec_channel *src1)
4694 {
4695 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4696 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4697 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4698 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4699 }
4700
4701 static void
micro_usge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4702 micro_usge(union tgsi_exec_channel *dst,
4703 const union tgsi_exec_channel *src0,
4704 const union tgsi_exec_channel *src1)
4705 {
4706 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4707 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4708 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4709 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4710 }
4711
4712 static void
micro_ushr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4713 micro_ushr(union tgsi_exec_channel *dst,
4714 const union tgsi_exec_channel *src0,
4715 const union tgsi_exec_channel *src1)
4716 {
4717 unsigned masked_count;
4718 masked_count = src1->u[0] & 0x1f;
4719 dst->u[0] = src0->u[0] >> masked_count;
4720 masked_count = src1->u[1] & 0x1f;
4721 dst->u[1] = src0->u[1] >> masked_count;
4722 masked_count = src1->u[2] & 0x1f;
4723 dst->u[2] = src0->u[2] >> masked_count;
4724 masked_count = src1->u[3] & 0x1f;
4725 dst->u[3] = src0->u[3] >> masked_count;
4726 }
4727
4728 static void
micro_uslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4729 micro_uslt(union tgsi_exec_channel *dst,
4730 const union tgsi_exec_channel *src0,
4731 const union tgsi_exec_channel *src1)
4732 {
4733 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4734 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4735 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4736 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4737 }
4738
4739 static void
micro_usne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4740 micro_usne(union tgsi_exec_channel *dst,
4741 const union tgsi_exec_channel *src0,
4742 const union tgsi_exec_channel *src1)
4743 {
4744 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4745 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4746 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4747 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
4748 }
4749
4750 static void
micro_uarl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4751 micro_uarl(union tgsi_exec_channel *dst,
4752 const union tgsi_exec_channel *src)
4753 {
4754 dst->i[0] = src->u[0];
4755 dst->i[1] = src->u[1];
4756 dst->i[2] = src->u[2];
4757 dst->i[3] = src->u[3];
4758 }
4759
4760 /**
4761 * Signed bitfield extract (i.e. sign-extend the extracted bits)
4762 */
4763 static void
micro_ibfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4764 micro_ibfe(union tgsi_exec_channel *dst,
4765 const union tgsi_exec_channel *src0,
4766 const union tgsi_exec_channel *src1,
4767 const union tgsi_exec_channel *src2)
4768 {
4769 int i;
4770 for (i = 0; i < 4; i++) {
4771 int width = src2->i[i];
4772 int offset = src1->i[i] & 0x1f;
4773 if (width == 32 && offset == 0) {
4774 dst->i[i] = src0->i[i];
4775 continue;
4776 }
4777 width &= 0x1f;
4778 if (width == 0)
4779 dst->i[i] = 0;
4780 else if (width + offset < 32)
4781 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
4782 else
4783 dst->i[i] = src0->i[i] >> offset;
4784 }
4785 }
4786
4787 /**
4788 * Unsigned bitfield extract
4789 */
4790 static void
micro_ubfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4791 micro_ubfe(union tgsi_exec_channel *dst,
4792 const union tgsi_exec_channel *src0,
4793 const union tgsi_exec_channel *src1,
4794 const union tgsi_exec_channel *src2)
4795 {
4796 int i;
4797 for (i = 0; i < 4; i++) {
4798 int width = src2->u[i];
4799 int offset = src1->u[i] & 0x1f;
4800 if (width == 32 && offset == 0) {
4801 dst->u[i] = src0->u[i];
4802 continue;
4803 }
4804 width &= 0x1f;
4805 if (width == 0)
4806 dst->u[i] = 0;
4807 else if (width + offset < 32)
4808 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
4809 else
4810 dst->u[i] = src0->u[i] >> offset;
4811 }
4812 }
4813
4814 /**
4815 * Bitfield insert: copy low bits from src1 into a region of src0.
4816 */
4817 static void
micro_bfi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)4818 micro_bfi(union tgsi_exec_channel *dst,
4819 const union tgsi_exec_channel *src0,
4820 const union tgsi_exec_channel *src1,
4821 const union tgsi_exec_channel *src2,
4822 const union tgsi_exec_channel *src3)
4823 {
4824 int i;
4825 for (i = 0; i < 4; i++) {
4826 int width = src3->u[i];
4827 int offset = src2->u[i] & 0x1f;
4828 if (width == 32) {
4829 dst->u[i] = src1->u[i];
4830 } else {
4831 int bitmask = ((1 << width) - 1) << offset;
4832 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
4833 }
4834 }
4835 }
4836
4837 static void
micro_brev(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4838 micro_brev(union tgsi_exec_channel *dst,
4839 const union tgsi_exec_channel *src)
4840 {
4841 dst->u[0] = util_bitreverse(src->u[0]);
4842 dst->u[1] = util_bitreverse(src->u[1]);
4843 dst->u[2] = util_bitreverse(src->u[2]);
4844 dst->u[3] = util_bitreverse(src->u[3]);
4845 }
4846
4847 static void
micro_popc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4848 micro_popc(union tgsi_exec_channel *dst,
4849 const union tgsi_exec_channel *src)
4850 {
4851 dst->u[0] = util_bitcount(src->u[0]);
4852 dst->u[1] = util_bitcount(src->u[1]);
4853 dst->u[2] = util_bitcount(src->u[2]);
4854 dst->u[3] = util_bitcount(src->u[3]);
4855 }
4856
4857 static void
micro_lsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4858 micro_lsb(union tgsi_exec_channel *dst,
4859 const union tgsi_exec_channel *src)
4860 {
4861 dst->i[0] = ffs(src->u[0]) - 1;
4862 dst->i[1] = ffs(src->u[1]) - 1;
4863 dst->i[2] = ffs(src->u[2]) - 1;
4864 dst->i[3] = ffs(src->u[3]) - 1;
4865 }
4866
4867 static void
micro_imsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4868 micro_imsb(union tgsi_exec_channel *dst,
4869 const union tgsi_exec_channel *src)
4870 {
4871 dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
4872 dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
4873 dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
4874 dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
4875 }
4876
4877 static void
micro_umsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4878 micro_umsb(union tgsi_exec_channel *dst,
4879 const union tgsi_exec_channel *src)
4880 {
4881 dst->i[0] = util_last_bit(src->u[0]) - 1;
4882 dst->i[1] = util_last_bit(src->u[1]) - 1;
4883 dst->i[2] = util_last_bit(src->u[2]) - 1;
4884 dst->i[3] = util_last_bit(src->u[3]) - 1;
4885 }
4886
4887
4888 static void
exec_interp_at_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4889 exec_interp_at_sample(struct tgsi_exec_machine *mach,
4890 const struct tgsi_full_instruction *inst)
4891 {
4892 union tgsi_exec_channel index;
4893 union tgsi_exec_channel index2D;
4894 union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
4895 const struct tgsi_full_src_register *reg = &inst->Src[0];
4896
4897 assert(reg->Register.File == TGSI_FILE_INPUT);
4898 assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE);
4899
4900 get_index_registers(mach, reg, &index, &index2D);
4901 float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX];
4902
4903 /* Short cut: sample 0 is like a normal fetch */
4904 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4905 if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4906 continue;
4907
4908 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
4909 &result[chan]);
4910 if (sample != 0.0f) {
4911
4912 /* TODO: define the samples > 0, but so far we only do fake MSAA */
4913 float x = 0;
4914 float y = 0;
4915
4916 unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan];
4917 assert(pos >= 0);
4918 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
4919 mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]);
4920 }
4921 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);
4922 }
4923 }
4924
4925
4926 static void
exec_interp_at_offset(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4927 exec_interp_at_offset(struct tgsi_exec_machine *mach,
4928 const struct tgsi_full_instruction *inst)
4929 {
4930 union tgsi_exec_channel index;
4931 union tgsi_exec_channel index2D;
4932 union tgsi_exec_channel ofsx;
4933 union tgsi_exec_channel ofsy;
4934 const struct tgsi_full_src_register *reg = &inst->Src[0];
4935
4936 assert(reg->Register.File == TGSI_FILE_INPUT);
4937
4938 get_index_registers(mach, reg, &index, &index2D);
4939 unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0];
4940
4941 fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
4942 fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
4943
4944 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4945 if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4946 continue;
4947 union tgsi_exec_channel result;
4948 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result);
4949 mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result);
4950 store_dest(mach, &result, &inst->Dst[0], inst, chan);
4951 }
4952 }
4953
4954
4955 static void
exec_interp_at_centroid(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4956 exec_interp_at_centroid(struct tgsi_exec_machine *mach,
4957 const struct tgsi_full_instruction *inst)
4958 {
4959 union tgsi_exec_channel index;
4960 union tgsi_exec_channel index2D;
4961 union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
4962 const struct tgsi_full_src_register *reg = &inst->Src[0];
4963
4964 assert(reg->Register.File == TGSI_FILE_INPUT);
4965 get_index_registers(mach, reg, &index, &index2D);
4966
4967 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4968 if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4969 continue;
4970
4971 /* Here we should add the change to use a sample that lies within the
4972 * primitive (Section 15.2):
4973 *
4974 * "When interpolating variables declared using centroid in ,
4975 * the variable is sampled at a location within the pixel covered
4976 * by the primitive generating the fragment.
4977 * ...
4978 * The built-in functions interpolateAtCentroid ... will sample
4979 * variables as though they were declared with the centroid ...
4980 * qualifier[s]."
4981 *
4982 * Since we only support 1 sample currently, this is just a pass-through.
4983 */
4984 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
4985 &result[chan]);
4986 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);
4987 }
4988
4989 }
4990
4991
4992 /**
4993 * Execute a TGSI instruction.
4994 * Returns TRUE if a barrier instruction is hit,
4995 * otherwise FALSE.
4996 */
4997 static boolean
exec_instruction(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int * pc)4998 exec_instruction(
4999 struct tgsi_exec_machine *mach,
5000 const struct tgsi_full_instruction *inst,
5001 int *pc )
5002 {
5003 union tgsi_exec_channel r[10];
5004
5005 (*pc)++;
5006
5007 switch (inst->Instruction.Opcode) {
5008 case TGSI_OPCODE_ARL:
5009 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_FLOAT);
5010 break;
5011
5012 case TGSI_OPCODE_MOV:
5013 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_FLOAT);
5014 break;
5015
5016 case TGSI_OPCODE_LIT:
5017 exec_lit(mach, inst);
5018 break;
5019
5020 case TGSI_OPCODE_RCP:
5021 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT);
5022 break;
5023
5024 case TGSI_OPCODE_RSQ:
5025 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT);
5026 break;
5027
5028 case TGSI_OPCODE_EXP:
5029 exec_exp(mach, inst);
5030 break;
5031
5032 case TGSI_OPCODE_LOG:
5033 exec_log(mach, inst);
5034 break;
5035
5036 case TGSI_OPCODE_MUL:
5037 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT);
5038 break;
5039
5040 case TGSI_OPCODE_ADD:
5041 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT);
5042 break;
5043
5044 case TGSI_OPCODE_DP3:
5045 exec_dp3(mach, inst);
5046 break;
5047
5048 case TGSI_OPCODE_DP4:
5049 exec_dp4(mach, inst);
5050 break;
5051
5052 case TGSI_OPCODE_DST:
5053 exec_dst(mach, inst);
5054 break;
5055
5056 case TGSI_OPCODE_MIN:
5057 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT);
5058 break;
5059
5060 case TGSI_OPCODE_MAX:
5061 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT);
5062 break;
5063
5064 case TGSI_OPCODE_SLT:
5065 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT);
5066 break;
5067
5068 case TGSI_OPCODE_SGE:
5069 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT);
5070 break;
5071
5072 case TGSI_OPCODE_MAD:
5073 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT);
5074 break;
5075
5076 case TGSI_OPCODE_LRP:
5077 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT);
5078 break;
5079
5080 case TGSI_OPCODE_SQRT:
5081 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT);
5082 break;
5083
5084 case TGSI_OPCODE_FRC:
5085 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT);
5086 break;
5087
5088 case TGSI_OPCODE_FLR:
5089 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT);
5090 break;
5091
5092 case TGSI_OPCODE_ROUND:
5093 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT);
5094 break;
5095
5096 case TGSI_OPCODE_EX2:
5097 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT);
5098 break;
5099
5100 case TGSI_OPCODE_LG2:
5101 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT);
5102 break;
5103
5104 case TGSI_OPCODE_POW:
5105 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT);
5106 break;
5107
5108 case TGSI_OPCODE_LDEXP:
5109 exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT);
5110 break;
5111
5112 case TGSI_OPCODE_COS:
5113 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT);
5114 break;
5115
5116 case TGSI_OPCODE_DDX_FINE:
5117 exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT);
5118 break;
5119
5120 case TGSI_OPCODE_DDX:
5121 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT);
5122 break;
5123
5124 case TGSI_OPCODE_DDY_FINE:
5125 exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT);
5126 break;
5127
5128 case TGSI_OPCODE_DDY:
5129 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT);
5130 break;
5131
5132 case TGSI_OPCODE_KILL:
5133 exec_kill (mach);
5134 break;
5135
5136 case TGSI_OPCODE_KILL_IF:
5137 exec_kill_if (mach, inst);
5138 break;
5139
5140 case TGSI_OPCODE_PK2H:
5141 exec_pk2h(mach, inst);
5142 break;
5143
5144 case TGSI_OPCODE_PK2US:
5145 assert (0);
5146 break;
5147
5148 case TGSI_OPCODE_PK4B:
5149 assert (0);
5150 break;
5151
5152 case TGSI_OPCODE_PK4UB:
5153 assert (0);
5154 break;
5155
5156 case TGSI_OPCODE_SEQ:
5157 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT);
5158 break;
5159
5160 case TGSI_OPCODE_SGT:
5161 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT);
5162 break;
5163
5164 case TGSI_OPCODE_SIN:
5165 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT);
5166 break;
5167
5168 case TGSI_OPCODE_SLE:
5169 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT);
5170 break;
5171
5172 case TGSI_OPCODE_SNE:
5173 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT);
5174 break;
5175
5176 case TGSI_OPCODE_TEX:
5177 /* simple texture lookup */
5178 /* src[0] = texcoord */
5179 /* src[1] = sampler unit */
5180 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
5181 break;
5182
5183 case TGSI_OPCODE_TXB:
5184 /* Texture lookup with lod bias */
5185 /* src[0] = texcoord (src[0].w = LOD bias) */
5186 /* src[1] = sampler unit */
5187 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
5188 break;
5189
5190 case TGSI_OPCODE_TXD:
5191 /* Texture lookup with explict partial derivatives */
5192 /* src[0] = texcoord */
5193 /* src[1] = d[strq]/dx */
5194 /* src[2] = d[strq]/dy */
5195 /* src[3] = sampler unit */
5196 exec_txd(mach, inst);
5197 break;
5198
5199 case TGSI_OPCODE_TXL:
5200 /* Texture lookup with explit LOD */
5201 /* src[0] = texcoord (src[0].w = LOD) */
5202 /* src[1] = sampler unit */
5203 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
5204 break;
5205
5206 case TGSI_OPCODE_TXP:
5207 /* Texture lookup with projection */
5208 /* src[0] = texcoord (src[0].w = projection) */
5209 /* src[1] = sampler unit */
5210 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
5211 break;
5212
5213 case TGSI_OPCODE_TG4:
5214 /* src[0] = texcoord */
5215 /* src[1] = component */
5216 /* src[2] = sampler unit */
5217 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
5218 break;
5219
5220 case TGSI_OPCODE_LODQ:
5221 /* src[0] = texcoord */
5222 /* src[1] = sampler unit */
5223 exec_lodq(mach, inst);
5224 break;
5225
5226 case TGSI_OPCODE_UP2H:
5227 exec_up2h(mach, inst);
5228 break;
5229
5230 case TGSI_OPCODE_UP2US:
5231 assert (0);
5232 break;
5233
5234 case TGSI_OPCODE_UP4B:
5235 assert (0);
5236 break;
5237
5238 case TGSI_OPCODE_UP4UB:
5239 assert (0);
5240 break;
5241
5242 case TGSI_OPCODE_ARR:
5243 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_FLOAT);
5244 break;
5245
5246 case TGSI_OPCODE_CAL:
5247 /* skip the call if no execution channels are enabled */
5248 if (mach->ExecMask) {
5249 /* do the call */
5250
5251 /* First, record the depths of the execution stacks.
5252 * This is important for deeply nested/looped return statements.
5253 * We have to unwind the stacks by the correct amount. For a
5254 * real code generator, we could determine the number of entries
5255 * to pop off each stack with simple static analysis and avoid
5256 * implementing this data structure at run time.
5257 */
5258 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
5259 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
5260 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5261 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5262 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
5263 /* note that PC was already incremented above */
5264 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
5265
5266 mach->CallStackTop++;
5267
5268 /* Second, push the Cond, Loop, Cont, Func stacks */
5269 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5270 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5271 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5272 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5273 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5274 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5275
5276 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5277 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5278 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5279 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5280 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5281 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
5282
5283 /* Finally, jump to the subroutine. The label is a pointer
5284 * (an instruction number) to the BGNSUB instruction.
5285 */
5286 *pc = inst->Label.Label;
5287 assert(mach->Instructions[*pc].Instruction.Opcode
5288 == TGSI_OPCODE_BGNSUB);
5289 }
5290 break;
5291
5292 case TGSI_OPCODE_RET:
5293 mach->FuncMask &= ~mach->ExecMask;
5294 UPDATE_EXEC_MASK(mach);
5295
5296 if (mach->FuncMask == 0x0) {
5297 /* really return now (otherwise, keep executing */
5298
5299 if (mach->CallStackTop == 0) {
5300 /* returning from main() */
5301 mach->CondStackTop = 0;
5302 mach->LoopStackTop = 0;
5303 mach->ContStackTop = 0;
5304 mach->LoopLabelStackTop = 0;
5305 mach->SwitchStackTop = 0;
5306 mach->BreakStackTop = 0;
5307 *pc = -1;
5308 return FALSE;
5309 }
5310
5311 assert(mach->CallStackTop > 0);
5312 mach->CallStackTop--;
5313
5314 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5315 mach->CondMask = mach->CondStack[mach->CondStackTop];
5316
5317 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5318 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5319
5320 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5321 mach->ContMask = mach->ContStack[mach->ContStackTop];
5322
5323 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5324 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5325
5326 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5327 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5328
5329 assert(mach->FuncStackTop > 0);
5330 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5331
5332 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5333
5334 UPDATE_EXEC_MASK(mach);
5335 }
5336 break;
5337
5338 case TGSI_OPCODE_SSG:
5339 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT);
5340 break;
5341
5342 case TGSI_OPCODE_CMP:
5343 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT);
5344 break;
5345
5346 case TGSI_OPCODE_DIV:
5347 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT);
5348 break;
5349
5350 case TGSI_OPCODE_DP2:
5351 exec_dp2(mach, inst);
5352 break;
5353
5354 case TGSI_OPCODE_IF:
5355 /* push CondMask */
5356 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5357 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5358 FETCH( &r[0], 0, TGSI_CHAN_X );
5359 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
5360 if (!r[0].f[i])
5361 mach->CondMask &= ~(1 << i);
5362 }
5363 UPDATE_EXEC_MASK(mach);
5364 /* If no channels are taking the then branch, jump to ELSE. */
5365 if (!mach->CondMask)
5366 *pc = inst->Label.Label;
5367 break;
5368
5369 case TGSI_OPCODE_UIF:
5370 /* push CondMask */
5371 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5372 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5373 IFETCH( &r[0], 0, TGSI_CHAN_X );
5374 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
5375 if (!r[0].u[i])
5376 mach->CondMask &= ~(1 << i);
5377 }
5378 UPDATE_EXEC_MASK(mach);
5379 /* If no channels are taking the then branch, jump to ELSE. */
5380 if (!mach->CondMask)
5381 *pc = inst->Label.Label;
5382 break;
5383
5384 case TGSI_OPCODE_ELSE:
5385 /* invert CondMask wrt previous mask */
5386 {
5387 uint prevMask;
5388 assert(mach->CondStackTop > 0);
5389 prevMask = mach->CondStack[mach->CondStackTop - 1];
5390 mach->CondMask = ~mach->CondMask & prevMask;
5391 UPDATE_EXEC_MASK(mach);
5392
5393 /* If no channels are taking ELSE, jump to ENDIF */
5394 if (!mach->CondMask)
5395 *pc = inst->Label.Label;
5396 }
5397 break;
5398
5399 case TGSI_OPCODE_ENDIF:
5400 /* pop CondMask */
5401 assert(mach->CondStackTop > 0);
5402 mach->CondMask = mach->CondStack[--mach->CondStackTop];
5403 UPDATE_EXEC_MASK(mach);
5404 break;
5405
5406 case TGSI_OPCODE_END:
5407 /* make sure we end primitives which haven't
5408 * been explicitly emitted */
5409 conditional_emit_primitive(mach);
5410 /* halt execution */
5411 *pc = -1;
5412 break;
5413
5414 case TGSI_OPCODE_CEIL:
5415 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT);
5416 break;
5417
5418 case TGSI_OPCODE_I2F:
5419 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_INT);
5420 break;
5421
5422 case TGSI_OPCODE_NOT:
5423 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT);
5424 break;
5425
5426 case TGSI_OPCODE_TRUNC:
5427 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT);
5428 break;
5429
5430 case TGSI_OPCODE_SHL:
5431 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT);
5432 break;
5433
5434 case TGSI_OPCODE_AND:
5435 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT);
5436 break;
5437
5438 case TGSI_OPCODE_OR:
5439 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT);
5440 break;
5441
5442 case TGSI_OPCODE_MOD:
5443 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT);
5444 break;
5445
5446 case TGSI_OPCODE_XOR:
5447 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT);
5448 break;
5449
5450 case TGSI_OPCODE_TXF:
5451 exec_txf(mach, inst);
5452 break;
5453
5454 case TGSI_OPCODE_TXQ:
5455 exec_txq(mach, inst);
5456 break;
5457
5458 case TGSI_OPCODE_EMIT:
5459 emit_vertex(mach, inst);
5460 break;
5461
5462 case TGSI_OPCODE_ENDPRIM:
5463 emit_primitive(mach, inst);
5464 break;
5465
5466 case TGSI_OPCODE_BGNLOOP:
5467 /* push LoopMask and ContMasks */
5468 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5469 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5470 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5471 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5472
5473 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5474 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5475 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5476 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5477 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
5478 break;
5479
5480 case TGSI_OPCODE_ENDLOOP:
5481 /* Restore ContMask, but don't pop */
5482 assert(mach->ContStackTop > 0);
5483 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
5484 UPDATE_EXEC_MASK(mach);
5485 if (mach->ExecMask) {
5486 /* repeat loop: jump to instruction just past BGNLOOP */
5487 assert(mach->LoopLabelStackTop > 0);
5488 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
5489 }
5490 else {
5491 /* exit loop: pop LoopMask */
5492 assert(mach->LoopStackTop > 0);
5493 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
5494 /* pop ContMask */
5495 assert(mach->ContStackTop > 0);
5496 mach->ContMask = mach->ContStack[--mach->ContStackTop];
5497 assert(mach->LoopLabelStackTop > 0);
5498 --mach->LoopLabelStackTop;
5499
5500 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
5501 }
5502 UPDATE_EXEC_MASK(mach);
5503 break;
5504
5505 case TGSI_OPCODE_BRK:
5506 exec_break(mach);
5507 break;
5508
5509 case TGSI_OPCODE_CONT:
5510 /* turn off cont channels for each enabled exec channel */
5511 mach->ContMask &= ~mach->ExecMask;
5512 /* Todo: if mach->LoopMask == 0, jump to end of loop */
5513 UPDATE_EXEC_MASK(mach);
5514 break;
5515
5516 case TGSI_OPCODE_BGNSUB:
5517 /* no-op */
5518 break;
5519
5520 case TGSI_OPCODE_ENDSUB:
5521 /*
5522 * XXX: This really should be a no-op. We should never reach this opcode.
5523 */
5524
5525 assert(mach->CallStackTop > 0);
5526 mach->CallStackTop--;
5527
5528 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5529 mach->CondMask = mach->CondStack[mach->CondStackTop];
5530
5531 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5532 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5533
5534 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5535 mach->ContMask = mach->ContStack[mach->ContStackTop];
5536
5537 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5538 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5539
5540 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5541 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5542
5543 assert(mach->FuncStackTop > 0);
5544 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5545
5546 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5547
5548 UPDATE_EXEC_MASK(mach);
5549 break;
5550
5551 case TGSI_OPCODE_NOP:
5552 break;
5553
5554 case TGSI_OPCODE_F2I:
5555 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_FLOAT);
5556 break;
5557
5558 case TGSI_OPCODE_FSEQ:
5559 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_FLOAT);
5560 break;
5561
5562 case TGSI_OPCODE_FSGE:
5563 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_FLOAT);
5564 break;
5565
5566 case TGSI_OPCODE_FSLT:
5567 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_FLOAT);
5568 break;
5569
5570 case TGSI_OPCODE_FSNE:
5571 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_FLOAT);
5572 break;
5573
5574 case TGSI_OPCODE_IDIV:
5575 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT);
5576 break;
5577
5578 case TGSI_OPCODE_IMAX:
5579 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT);
5580 break;
5581
5582 case TGSI_OPCODE_IMIN:
5583 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT);
5584 break;
5585
5586 case TGSI_OPCODE_INEG:
5587 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT);
5588 break;
5589
5590 case TGSI_OPCODE_ISGE:
5591 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT);
5592 break;
5593
5594 case TGSI_OPCODE_ISHR:
5595 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT);
5596 break;
5597
5598 case TGSI_OPCODE_ISLT:
5599 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT);
5600 break;
5601
5602 case TGSI_OPCODE_F2U:
5603 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_FLOAT);
5604 break;
5605
5606 case TGSI_OPCODE_U2F:
5607 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_UINT);
5608 break;
5609
5610 case TGSI_OPCODE_UADD:
5611 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT);
5612 break;
5613
5614 case TGSI_OPCODE_UDIV:
5615 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT);
5616 break;
5617
5618 case TGSI_OPCODE_UMAD:
5619 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT);
5620 break;
5621
5622 case TGSI_OPCODE_UMAX:
5623 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT);
5624 break;
5625
5626 case TGSI_OPCODE_UMIN:
5627 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT);
5628 break;
5629
5630 case TGSI_OPCODE_UMOD:
5631 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT);
5632 break;
5633
5634 case TGSI_OPCODE_UMUL:
5635 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT);
5636 break;
5637
5638 case TGSI_OPCODE_IMUL_HI:
5639 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT);
5640 break;
5641
5642 case TGSI_OPCODE_UMUL_HI:
5643 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT);
5644 break;
5645
5646 case TGSI_OPCODE_USEQ:
5647 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT);
5648 break;
5649
5650 case TGSI_OPCODE_USGE:
5651 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT);
5652 break;
5653
5654 case TGSI_OPCODE_USHR:
5655 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT);
5656 break;
5657
5658 case TGSI_OPCODE_USLT:
5659 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT);
5660 break;
5661
5662 case TGSI_OPCODE_USNE:
5663 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT);
5664 break;
5665
5666 case TGSI_OPCODE_SWITCH:
5667 exec_switch(mach, inst);
5668 break;
5669
5670 case TGSI_OPCODE_CASE:
5671 exec_case(mach, inst);
5672 break;
5673
5674 case TGSI_OPCODE_DEFAULT:
5675 exec_default(mach);
5676 break;
5677
5678 case TGSI_OPCODE_ENDSWITCH:
5679 exec_endswitch(mach);
5680 break;
5681
5682 case TGSI_OPCODE_SAMPLE_I:
5683 exec_txf(mach, inst);
5684 break;
5685
5686 case TGSI_OPCODE_SAMPLE_I_MS:
5687 exec_txf(mach, inst);
5688 break;
5689
5690 case TGSI_OPCODE_SAMPLE:
5691 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE);
5692 break;
5693
5694 case TGSI_OPCODE_SAMPLE_B:
5695 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE);
5696 break;
5697
5698 case TGSI_OPCODE_SAMPLE_C:
5699 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE);
5700 break;
5701
5702 case TGSI_OPCODE_SAMPLE_C_LZ:
5703 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE);
5704 break;
5705
5706 case TGSI_OPCODE_SAMPLE_D:
5707 exec_sample_d(mach, inst);
5708 break;
5709
5710 case TGSI_OPCODE_SAMPLE_L:
5711 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE);
5712 break;
5713
5714 case TGSI_OPCODE_GATHER4:
5715 exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE);
5716 break;
5717
5718 case TGSI_OPCODE_SVIEWINFO:
5719 exec_txq(mach, inst);
5720 break;
5721
5722 case TGSI_OPCODE_SAMPLE_POS:
5723 assert(0);
5724 break;
5725
5726 case TGSI_OPCODE_SAMPLE_INFO:
5727 assert(0);
5728 break;
5729
5730 case TGSI_OPCODE_LOD:
5731 exec_lodq(mach, inst);
5732 break;
5733
5734 case TGSI_OPCODE_UARL:
5735 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_UINT);
5736 break;
5737
5738 case TGSI_OPCODE_UCMP:
5739 exec_ucmp(mach, inst);
5740 break;
5741
5742 case TGSI_OPCODE_IABS:
5743 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT);
5744 break;
5745
5746 case TGSI_OPCODE_ISSG:
5747 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT);
5748 break;
5749
5750 case TGSI_OPCODE_TEX2:
5751 /* simple texture lookup */
5752 /* src[0] = texcoord */
5753 /* src[1] = compare */
5754 /* src[2] = sampler unit */
5755 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
5756 break;
5757 case TGSI_OPCODE_TXB2:
5758 /* simple texture lookup */
5759 /* src[0] = texcoord */
5760 /* src[1] = bias */
5761 /* src[2] = sampler unit */
5762 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
5763 break;
5764 case TGSI_OPCODE_TXL2:
5765 /* simple texture lookup */
5766 /* src[0] = texcoord */
5767 /* src[1] = lod */
5768 /* src[2] = sampler unit */
5769 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
5770 break;
5771
5772 case TGSI_OPCODE_IBFE:
5773 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT);
5774 break;
5775 case TGSI_OPCODE_UBFE:
5776 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT);
5777 break;
5778 case TGSI_OPCODE_BFI:
5779 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT);
5780 break;
5781 case TGSI_OPCODE_BREV:
5782 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT);
5783 break;
5784 case TGSI_OPCODE_POPC:
5785 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT);
5786 break;
5787 case TGSI_OPCODE_LSB:
5788 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_UINT);
5789 break;
5790 case TGSI_OPCODE_IMSB:
5791 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT);
5792 break;
5793 case TGSI_OPCODE_UMSB:
5794 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_UINT);
5795 break;
5796
5797 case TGSI_OPCODE_F2D:
5798 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
5799 break;
5800
5801 case TGSI_OPCODE_D2F:
5802 exec_64_2_t(mach, inst, micro_d2f);
5803 break;
5804
5805 case TGSI_OPCODE_DABS:
5806 exec_double_unary(mach, inst, micro_dabs);
5807 break;
5808
5809 case TGSI_OPCODE_DNEG:
5810 exec_double_unary(mach, inst, micro_dneg);
5811 break;
5812
5813 case TGSI_OPCODE_DADD:
5814 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
5815 break;
5816
5817 case TGSI_OPCODE_DDIV:
5818 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
5819 break;
5820
5821 case TGSI_OPCODE_DMUL:
5822 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
5823 break;
5824
5825 case TGSI_OPCODE_DMAX:
5826 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
5827 break;
5828
5829 case TGSI_OPCODE_DMIN:
5830 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
5831 break;
5832
5833 case TGSI_OPCODE_DSLT:
5834 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
5835 break;
5836
5837 case TGSI_OPCODE_DSGE:
5838 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
5839 break;
5840
5841 case TGSI_OPCODE_DSEQ:
5842 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
5843 break;
5844
5845 case TGSI_OPCODE_DSNE:
5846 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
5847 break;
5848
5849 case TGSI_OPCODE_DRCP:
5850 exec_double_unary(mach, inst, micro_drcp);
5851 break;
5852
5853 case TGSI_OPCODE_DSQRT:
5854 exec_double_unary(mach, inst, micro_dsqrt);
5855 break;
5856
5857 case TGSI_OPCODE_DRSQ:
5858 exec_double_unary(mach, inst, micro_drsq);
5859 break;
5860
5861 case TGSI_OPCODE_DMAD:
5862 exec_double_trinary(mach, inst, micro_dmad);
5863 break;
5864
5865 case TGSI_OPCODE_DFRAC:
5866 exec_double_unary(mach, inst, micro_dfrac);
5867 break;
5868
5869 case TGSI_OPCODE_DFLR:
5870 exec_double_unary(mach, inst, micro_dflr);
5871 break;
5872
5873 case TGSI_OPCODE_DLDEXP:
5874 exec_dldexp(mach, inst);
5875 break;
5876
5877 case TGSI_OPCODE_DFRACEXP:
5878 exec_dfracexp(mach, inst);
5879 break;
5880
5881 case TGSI_OPCODE_I2D:
5882 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT);
5883 break;
5884
5885 case TGSI_OPCODE_D2I:
5886 exec_64_2_t(mach, inst, micro_d2i);
5887 break;
5888
5889 case TGSI_OPCODE_U2D:
5890 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_FLOAT);
5891 break;
5892
5893 case TGSI_OPCODE_D2U:
5894 exec_64_2_t(mach, inst, micro_d2u);
5895 break;
5896
5897 case TGSI_OPCODE_LOAD:
5898 exec_load(mach, inst);
5899 break;
5900
5901 case TGSI_OPCODE_STORE:
5902 exec_store(mach, inst);
5903 break;
5904
5905 case TGSI_OPCODE_ATOMUADD:
5906 case TGSI_OPCODE_ATOMXCHG:
5907 case TGSI_OPCODE_ATOMCAS:
5908 case TGSI_OPCODE_ATOMAND:
5909 case TGSI_OPCODE_ATOMOR:
5910 case TGSI_OPCODE_ATOMXOR:
5911 case TGSI_OPCODE_ATOMUMIN:
5912 case TGSI_OPCODE_ATOMUMAX:
5913 case TGSI_OPCODE_ATOMIMIN:
5914 case TGSI_OPCODE_ATOMIMAX:
5915 case TGSI_OPCODE_ATOMFADD:
5916 exec_atomop(mach, inst);
5917 break;
5918
5919 case TGSI_OPCODE_RESQ:
5920 exec_resq(mach, inst);
5921 break;
5922 case TGSI_OPCODE_BARRIER:
5923 case TGSI_OPCODE_MEMBAR:
5924 return TRUE;
5925 break;
5926
5927 case TGSI_OPCODE_I64ABS:
5928 exec_double_unary(mach, inst, micro_i64abs);
5929 break;
5930
5931 case TGSI_OPCODE_I64SSG:
5932 exec_double_unary(mach, inst, micro_i64sgn);
5933 break;
5934
5935 case TGSI_OPCODE_I64NEG:
5936 exec_double_unary(mach, inst, micro_i64neg);
5937 break;
5938
5939 case TGSI_OPCODE_U64SEQ:
5940 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
5941 break;
5942
5943 case TGSI_OPCODE_U64SNE:
5944 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
5945 break;
5946
5947 case TGSI_OPCODE_I64SLT:
5948 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
5949 break;
5950 case TGSI_OPCODE_U64SLT:
5951 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
5952 break;
5953
5954 case TGSI_OPCODE_I64SGE:
5955 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
5956 break;
5957 case TGSI_OPCODE_U64SGE:
5958 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
5959 break;
5960
5961 case TGSI_OPCODE_I64MIN:
5962 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
5963 break;
5964 case TGSI_OPCODE_U64MIN:
5965 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
5966 break;
5967 case TGSI_OPCODE_I64MAX:
5968 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
5969 break;
5970 case TGSI_OPCODE_U64MAX:
5971 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
5972 break;
5973 case TGSI_OPCODE_U64ADD:
5974 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
5975 break;
5976 case TGSI_OPCODE_U64MUL:
5977 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
5978 break;
5979 case TGSI_OPCODE_U64SHL:
5980 exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
5981 break;
5982 case TGSI_OPCODE_I64SHR:
5983 exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
5984 break;
5985 case TGSI_OPCODE_U64SHR:
5986 exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
5987 break;
5988 case TGSI_OPCODE_U64DIV:
5989 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
5990 break;
5991 case TGSI_OPCODE_I64DIV:
5992 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
5993 break;
5994 case TGSI_OPCODE_U64MOD:
5995 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
5996 break;
5997 case TGSI_OPCODE_I64MOD:
5998 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
5999 break;
6000
6001 case TGSI_OPCODE_F2U64:
6002 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
6003 break;
6004
6005 case TGSI_OPCODE_F2I64:
6006 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
6007 break;
6008
6009 case TGSI_OPCODE_U2I64:
6010 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
6011 break;
6012 case TGSI_OPCODE_I2I64:
6013 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
6014 break;
6015
6016 case TGSI_OPCODE_D2U64:
6017 exec_double_unary(mach, inst, micro_d2u64);
6018 break;
6019
6020 case TGSI_OPCODE_D2I64:
6021 exec_double_unary(mach, inst, micro_d2i64);
6022 break;
6023
6024 case TGSI_OPCODE_U642F:
6025 exec_64_2_t(mach, inst, micro_u642f);
6026 break;
6027 case TGSI_OPCODE_I642F:
6028 exec_64_2_t(mach, inst, micro_i642f);
6029 break;
6030
6031 case TGSI_OPCODE_U642D:
6032 exec_double_unary(mach, inst, micro_u642d);
6033 break;
6034 case TGSI_OPCODE_I642D:
6035 exec_double_unary(mach, inst, micro_i642d);
6036 break;
6037 case TGSI_OPCODE_INTERP_SAMPLE:
6038 exec_interp_at_sample(mach, inst);
6039 break;
6040 case TGSI_OPCODE_INTERP_OFFSET:
6041 exec_interp_at_offset(mach, inst);
6042 break;
6043 case TGSI_OPCODE_INTERP_CENTROID:
6044 exec_interp_at_centroid(mach, inst);
6045 break;
6046 default:
6047 assert( 0 );
6048 }
6049 return FALSE;
6050 }
6051
6052 static void
tgsi_exec_machine_setup_masks(struct tgsi_exec_machine * mach)6053 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
6054 {
6055 uint default_mask = 0xf;
6056
6057 mach->KillMask = 0;
6058 mach->OutputVertexOffset = 0;
6059
6060 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
6061 for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
6062 mach->OutputPrimCount[i] = 0;
6063 mach->Primitives[i][0] = 0;
6064 }
6065 /* GS runs on a single primitive for now */
6066 default_mask = 0x1;
6067 }
6068
6069 if (mach->NonHelperMask == 0)
6070 mach->NonHelperMask = default_mask;
6071 mach->CondMask = default_mask;
6072 mach->LoopMask = default_mask;
6073 mach->ContMask = default_mask;
6074 mach->FuncMask = default_mask;
6075 mach->ExecMask = default_mask;
6076
6077 mach->Switch.mask = default_mask;
6078
6079 assert(mach->CondStackTop == 0);
6080 assert(mach->LoopStackTop == 0);
6081 assert(mach->ContStackTop == 0);
6082 assert(mach->SwitchStackTop == 0);
6083 assert(mach->BreakStackTop == 0);
6084 assert(mach->CallStackTop == 0);
6085 }
6086
6087 /**
6088 * Run TGSI interpreter.
6089 * \return bitmask of "alive" quad components
6090 */
6091 uint
tgsi_exec_machine_run(struct tgsi_exec_machine * mach,int start_pc)6092 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
6093 {
6094 uint i;
6095
6096 mach->pc = start_pc;
6097
6098 if (!start_pc) {
6099 tgsi_exec_machine_setup_masks(mach);
6100
6101 /* execute declarations (interpolants) */
6102 for (i = 0; i < mach->NumDeclarations; i++) {
6103 exec_declaration( mach, mach->Declarations+i );
6104 }
6105 }
6106
6107 {
6108 #if DEBUG_EXECUTION
6109 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS];
6110 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6111 uint inst = 1;
6112
6113 if (!start_pc) {
6114 memset(mach->Temps, 0, sizeof(temps));
6115 if (mach->Outputs)
6116 memset(mach->Outputs, 0, sizeof(outputs));
6117 memset(temps, 0, sizeof(temps));
6118 memset(outputs, 0, sizeof(outputs));
6119 }
6120 #endif
6121
6122 /* execute instructions, until pc is set to -1 */
6123 while (mach->pc != -1) {
6124 boolean barrier_hit;
6125 #if DEBUG_EXECUTION
6126 uint i;
6127
6128 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6129 #endif
6130
6131 assert(mach->pc < (int) mach->NumInstructions);
6132 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
6133
6134 /* for compute shaders if we hit a barrier return now for later rescheduling */
6135 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
6136 return 0;
6137
6138 #if DEBUG_EXECUTION
6139 for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
6140 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6141 uint j;
6142
6143 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6144 debug_printf("TEMP[%2u] = ", i);
6145 for (j = 0; j < 4; j++) {
6146 if (j > 0) {
6147 debug_printf(" ");
6148 }
6149 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6150 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6151 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6152 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6153 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6154 }
6155 }
6156 }
6157 if (mach->Outputs) {
6158 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
6159 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
6160 uint j;
6161
6162 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
6163 debug_printf("OUT[%2u] = ", i);
6164 for (j = 0; j < 4; j++) {
6165 if (j > 0) {
6166 debug_printf(" ");
6167 }
6168 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6169 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
6170 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
6171 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
6172 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6173 }
6174 }
6175 }
6176 }
6177 #endif
6178 }
6179 }
6180
6181 #if 0
6182 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6183 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
6184 /*
6185 * Scale back depth component.
6186 */
6187 for (i = 0; i < 4; i++)
6188 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
6189 }
6190 #endif
6191
6192 /* Strictly speaking, these assertions aren't really needed but they
6193 * can potentially catch some bugs in the control flow code.
6194 */
6195 assert(mach->CondStackTop == 0);
6196 assert(mach->LoopStackTop == 0);
6197 assert(mach->ContStackTop == 0);
6198 assert(mach->SwitchStackTop == 0);
6199 assert(mach->BreakStackTop == 0);
6200 assert(mach->CallStackTop == 0);
6201
6202 return ~mach->KillMask;
6203 }
6204