• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- c++ -*- */
2 /*
3  * Copyright © 2010-2015 Intel Corporation
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #pragma once
26 
27 #include "elk_ir_vec4.h"
28 #include "elk_ir_allocator.h"
29 
30 namespace elk {
31    /**
32     * Toolbox to assemble a VEC4 IR program out of individual instructions.
33     *
34     * This object is meant to have an interface consistent with
35     * elk::fs_builder.  They cannot be fully interchangeable because
36     * elk::fs_builder generates scalar code while elk::vec4_builder generates
37     * vector code.
38     */
39    class vec4_builder {
40    public:
41       /** Type used in this IR to represent a source of an instruction. */
42       typedef elk::src_reg src_reg;
43 
44       /** Type used in this IR to represent the destination of an instruction. */
45       typedef elk::dst_reg dst_reg;
46 
47       /** Type used in this IR to represent an instruction. */
48       typedef vec4_instruction instruction;
49 
50       /**
51        * Construct a vec4_builder that inserts instructions into \p shader.
52        */
53       vec4_builder(elk_backend_shader *shader, unsigned dispatch_width = 8) :
shader(shader)54          shader(shader), block(NULL), cursor(NULL),
55          _dispatch_width(dispatch_width), _group(0),
56          force_writemask_all(false),
57          annotation()
58       {
59       }
60 
61       /**
62        * Construct a vec4_builder that inserts instructions into \p shader
63        * before instruction \p inst in basic block \p block.  The default
64        * execution controls and debug annotation are initialized from the
65        * instruction passed as argument.
66        */
vec4_builder(elk_backend_shader * shader,elk_bblock_t * block,instruction * inst)67       vec4_builder(elk_backend_shader *shader, elk_bblock_t *block, instruction *inst) :
68          shader(shader), block(block), cursor(inst),
69          _dispatch_width(inst->exec_size), _group(inst->group),
70          force_writemask_all(inst->force_writemask_all)
71       {
72          annotation.str = inst->annotation;
73          annotation.ir = inst->ir;
74       }
75 
76       /**
77        * Construct a vec4_builder that inserts instructions before \p cursor
78        * in basic block \p block, inheriting other code generation parameters
79        * from this.
80        */
81       vec4_builder
at(elk_bblock_t * block,exec_node * cursor)82       at(elk_bblock_t *block, exec_node *cursor) const
83       {
84          vec4_builder bld = *this;
85          bld.block = block;
86          bld.cursor = cursor;
87          return bld;
88       }
89 
90       /**
91        * Construct a vec4_builder appending instructions at the end of the
92        * instruction list of the shader, inheriting other code generation
93        * parameters from this.
94        */
95       vec4_builder
at_end()96       at_end() const
97       {
98          return at(NULL, (exec_node *)&shader->instructions.tail_sentinel);
99       }
100 
101       /**
102        * Construct a builder specifying the default SIMD width and group of
103        * channel enable signals, inheriting other code generation parameters
104        * from this.
105        *
106        * \p n gives the default SIMD width, \p i gives the slot group used for
107        * predication and control flow masking in multiples of \p n channels.
108        */
109       vec4_builder
group(unsigned n,unsigned i)110       group(unsigned n, unsigned i) const
111       {
112          assert(force_writemask_all ||
113                 (n <= dispatch_width() && i < dispatch_width() / n));
114          vec4_builder bld = *this;
115          bld._dispatch_width = n;
116          bld._group += i * n;
117          return bld;
118       }
119 
120       /**
121        * Construct a builder with per-channel control flow execution masking
122        * disabled if \p b is true.  If control flow execution masking is
123        * already disabled this has no effect.
124        */
125       vec4_builder
126       exec_all(bool b = true) const
127       {
128          vec4_builder bld = *this;
129          if (b)
130             bld.force_writemask_all = true;
131          return bld;
132       }
133 
134       /**
135        * Construct a builder with the given debug annotation info.
136        */
137       vec4_builder
138       annotate(const char *str, const void *ir = NULL) const
139       {
140          vec4_builder bld = *this;
141          bld.annotation.str = str;
142          bld.annotation.ir = ir;
143          return bld;
144       }
145 
146       /**
147        * Get the SIMD width in use.
148        */
149       unsigned
dispatch_width()150       dispatch_width() const
151       {
152          return _dispatch_width;
153       }
154 
155       /**
156        * Get the channel group in use.
157        */
158       unsigned
group()159       group() const
160       {
161          return _group;
162       }
163 
164       /**
165        * Allocate a virtual register of natural vector size (four for this IR)
166        * and SIMD width.  \p n gives the amount of space to allocate in
167        * dispatch_width units (which is just enough space for four logical
168        * components in this IR).
169        */
170       dst_reg
171       vgrf(enum elk_reg_type type, unsigned n = 1) const
172       {
173          assert(dispatch_width() <= 32);
174 
175          if (n > 0)
176             return retype(dst_reg(VGRF, shader->alloc.allocate(
177                                      n * DIV_ROUND_UP(type_sz(type), 4))),
178                            type);
179          else
180             return retype(null_reg_ud(), type);
181       }
182 
183       /**
184        * Create a null register of floating type.
185        */
186       dst_reg
null_reg_f()187       null_reg_f() const
188       {
189          return dst_reg(retype(elk_null_vec(dispatch_width()),
190                                ELK_REGISTER_TYPE_F));
191       }
192 
193       /**
194        * Create a null register of signed integer type.
195        */
196       dst_reg
null_reg_d()197       null_reg_d() const
198       {
199          return dst_reg(retype(elk_null_vec(dispatch_width()),
200                                ELK_REGISTER_TYPE_D));
201       }
202 
203       /**
204        * Create a null register of unsigned integer type.
205        */
206       dst_reg
null_reg_ud()207       null_reg_ud() const
208       {
209          return dst_reg(retype(elk_null_vec(dispatch_width()),
210                                ELK_REGISTER_TYPE_UD));
211       }
212 
213       /**
214        * Insert an instruction into the program.
215        */
216       instruction *
emit(const instruction & inst)217       emit(const instruction &inst) const
218       {
219          return emit(new(shader->mem_ctx) instruction(inst));
220       }
221 
222       /**
223        * Create and insert a nullary control instruction into the program.
224        */
225       instruction *
emit(enum elk_opcode opcode)226       emit(enum elk_opcode opcode) const
227       {
228          return emit(instruction(opcode));
229       }
230 
231       /**
232        * Create and insert a nullary instruction into the program.
233        */
234       instruction *
emit(enum elk_opcode opcode,const dst_reg & dst)235       emit(enum elk_opcode opcode, const dst_reg &dst) const
236       {
237          return emit(instruction(opcode, dst));
238       }
239 
240       /**
241        * Create and insert a unary instruction into the program.
242        */
243       instruction *
emit(enum elk_opcode opcode,const dst_reg & dst,const src_reg & src0)244       emit(enum elk_opcode opcode, const dst_reg &dst, const src_reg &src0) const
245       {
246          switch (opcode) {
247          case ELK_SHADER_OPCODE_RCP:
248          case ELK_SHADER_OPCODE_RSQ:
249          case ELK_SHADER_OPCODE_SQRT:
250          case ELK_SHADER_OPCODE_EXP2:
251          case ELK_SHADER_OPCODE_LOG2:
252          case ELK_SHADER_OPCODE_SIN:
253          case ELK_SHADER_OPCODE_COS:
254             return fix_math_instruction(
255                emit(instruction(opcode, dst,
256                                 fix_math_operand(src0))));
257 
258          default:
259             return emit(instruction(opcode, dst, src0));
260          }
261       }
262 
263       /**
264        * Create and insert a binary instruction into the program.
265        */
266       instruction *
emit(enum elk_opcode opcode,const dst_reg & dst,const src_reg & src0,const src_reg & src1)267       emit(enum elk_opcode opcode, const dst_reg &dst, const src_reg &src0,
268            const src_reg &src1) const
269       {
270          switch (opcode) {
271          case ELK_SHADER_OPCODE_POW:
272          case ELK_SHADER_OPCODE_INT_QUOTIENT:
273          case ELK_SHADER_OPCODE_INT_REMAINDER:
274             return fix_math_instruction(
275                emit(instruction(opcode, dst,
276                                 fix_math_operand(src0),
277                                 fix_math_operand(src1))));
278 
279          default:
280             return emit(instruction(opcode, dst, src0, src1));
281          }
282       }
283 
284       /**
285        * Create and insert a ternary instruction into the program.
286        */
287       instruction *
emit(enum elk_opcode opcode,const dst_reg & dst,const src_reg & src0,const src_reg & src1,const src_reg & src2)288       emit(enum elk_opcode opcode, const dst_reg &dst, const src_reg &src0,
289            const src_reg &src1, const src_reg &src2) const
290       {
291          switch (opcode) {
292          case ELK_OPCODE_BFE:
293          case ELK_OPCODE_BFI2:
294          case ELK_OPCODE_MAD:
295          case ELK_OPCODE_LRP:
296             return emit(instruction(opcode, dst,
297                                     fix_3src_operand(src0),
298                                     fix_3src_operand(src1),
299                                     fix_3src_operand(src2)));
300 
301          default:
302             return emit(instruction(opcode, dst, src0, src1, src2));
303          }
304       }
305 
306       /**
307        * Insert a preallocated instruction into the program.
308        */
309       instruction *
emit(instruction * inst)310       emit(instruction *inst) const
311       {
312          inst->exec_size = dispatch_width();
313          inst->group = group();
314          inst->force_writemask_all = force_writemask_all;
315          inst->size_written = inst->exec_size * type_sz(inst->dst.type);
316          inst->annotation = annotation.str;
317          inst->ir = annotation.ir;
318 
319          if (block)
320             static_cast<instruction *>(cursor)->insert_before(block, inst);
321          else
322             cursor->insert_before(inst);
323 
324          return inst;
325       }
326 
327       /**
328        * Select \p src0 if the comparison of both sources with the given
329        * conditional mod evaluates to true, otherwise select \p src1.
330        *
331        * Generally useful to get the minimum or maximum of two values.
332        */
333       instruction *
emit_minmax(const dst_reg & dst,const src_reg & src0,const src_reg & src1,elk_conditional_mod mod)334       emit_minmax(const dst_reg &dst, const src_reg &src0,
335                   const src_reg &src1, elk_conditional_mod mod) const
336       {
337          assert(mod == ELK_CONDITIONAL_GE || mod == ELK_CONDITIONAL_L);
338 
339          return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0),
340                                      fix_unsigned_negate(src1)));
341       }
342 
343       /**
344        * Copy any live channel from \p src to the first channel of the result.
345        */
346       src_reg
emit_uniformize(const src_reg & src)347       emit_uniformize(const src_reg &src) const
348       {
349          const vec4_builder ubld = exec_all();
350          const dst_reg chan_index =
351             writemask(vgrf(ELK_REGISTER_TYPE_UD), WRITEMASK_X);
352          const dst_reg dst = vgrf(src.type);
353 
354          ubld.emit(ELK_SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
355          ubld.emit(ELK_SHADER_OPCODE_BROADCAST, dst, src, src_reg(chan_index));
356 
357          return src_reg(dst);
358       }
359 
360       /**
361        * Assorted arithmetic ops.
362        * @{
363        */
364 #define ALU1(op)                                        \
365       instruction *                                     \
366       op(const dst_reg &dst, const src_reg &src0) const \
367       {                                                 \
368          return emit(ELK_OPCODE_##op, dst, src0);       \
369       }
370 
371 #define ALU2(op)                                                        \
372       instruction *                                                     \
373       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
374       {                                                                 \
375          return emit(ELK_OPCODE_##op, dst, src0, src1);                 \
376       }
377 
378 #define ALU2_ACC(op)                                                    \
379       instruction *                                                     \
380       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1) const \
381       {                                                                 \
382          instruction *inst = emit(ELK_OPCODE_##op, dst, src0, src1);    \
383          inst->writes_accumulator = true;                               \
384          return inst;                                                   \
385       }
386 
387 #define ALU3(op)                                                        \
388       instruction *                                                     \
389       op(const dst_reg &dst, const src_reg &src0, const src_reg &src1,  \
390          const src_reg &src2) const                                     \
391       {                                                                 \
392          return emit(ELK_OPCODE_##op, dst, src0, src1, src2);           \
393       }
394 
395       ALU2(ADD)
ALU2_ACC(ADDC)396       ALU2_ACC(ADDC)
397       ALU2(AND)
398       ALU2(ASR)
399       ALU2(AVG)
400       ALU3(BFE)
401       ALU2(BFI1)
402       ALU3(BFI2)
403       ALU1(BFREV)
404       ALU1(CBIT)
405       ALU3(CSEL)
406       ALU1(DIM)
407       ALU2(DP2)
408       ALU2(DP3)
409       ALU2(DP4)
410       ALU2(DPH)
411       ALU1(F16TO32)
412       ALU1(F32TO16)
413       ALU1(FBH)
414       ALU1(FBL)
415       ALU1(FRC)
416       ALU2(LINE)
417       ALU1(LZD)
418       ALU2(MAC)
419       ALU2_ACC(MACH)
420       ALU3(MAD)
421       ALU1(MOV)
422       ALU2(MUL)
423       ALU1(NOT)
424       ALU2(OR)
425       ALU2(PLN)
426       ALU1(RNDD)
427       ALU1(RNDE)
428       ALU1(RNDU)
429       ALU1(RNDZ)
430       ALU2(SAD2)
431       ALU2_ACC(SADA2)
432       ALU2(SEL)
433       ALU2(SHL)
434       ALU2(SHR)
435       ALU2_ACC(SUBB)
436       ALU2(XOR)
437 
438 #undef ALU3
439 #undef ALU2_ACC
440 #undef ALU2
441 #undef ALU1
442       /** @} */
443 
444       /**
445        * CMP: Sets the low bit of the destination channels with the result
446        * of the comparison, while the upper bits are undefined, and updates
447        * the flag register with the packed 16 bits of the result.
448        */
449       instruction *
450       CMP(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
451           elk_conditional_mod condition) const
452       {
453          /* Take the instruction:
454           *
455           * CMP null<d> src0<f> src1<f>
456           *
457           * Original gfx4 does type conversion to the destination type
458           * before comparison, producing garbage results for floating
459           * point comparisons.
460           *
461           * The destination type doesn't matter on newer generations,
462           * so we set the type to match src0 so we can compact the
463           * instruction.
464           */
465          return set_condmod(condition,
466                             emit(ELK_OPCODE_CMP, retype(dst, src0.type),
467                                  fix_unsigned_negate(src0),
468                                  fix_unsigned_negate(src1)));
469       }
470 
471       /**
472        * CMPN: Behaves like CMP, but produces true if src1 is NaN.
473        */
474       instruction *
CMPN(const dst_reg & dst,const src_reg & src0,const src_reg & src1,elk_conditional_mod condition)475       CMPN(const dst_reg &dst, const src_reg &src0, const src_reg &src1,
476           elk_conditional_mod condition) const
477       {
478          /* Take the instruction:
479           *
480           * CMPN null<d> src0<f> src1<f>
481           *
482           * Original gfx4 does type conversion to the destination type
483           * before comparison, producing garbage results for floating
484           * point comparisons.
485           *
486           * The destination type doesn't matter on newer generations,
487           * so we set the type to match src0 so we can compact the
488           * instruction.
489           */
490          return set_condmod(condition,
491                             emit(ELK_OPCODE_CMPN, retype(dst, src0.type),
492                                  fix_unsigned_negate(src0),
493                                  fix_unsigned_negate(src1)));
494       }
495 
496       /**
497        * Gfx4 predicated IF.
498        */
499       instruction *
IF(elk_predicate predicate)500       IF(elk_predicate predicate) const
501       {
502          return set_predicate(predicate, emit(ELK_OPCODE_IF));
503       }
504 
505       /**
506        * Gfx6 IF with embedded comparison.
507        */
508       instruction *
IF(const src_reg & src0,const src_reg & src1,elk_conditional_mod condition)509       IF(const src_reg &src0, const src_reg &src1,
510          elk_conditional_mod condition) const
511       {
512          assert(shader->devinfo->ver == 6);
513          return set_condmod(condition,
514                             emit(ELK_OPCODE_IF,
515                                  null_reg_d(),
516                                  fix_unsigned_negate(src0),
517                                  fix_unsigned_negate(src1)));
518       }
519 
520       /**
521        * Emit a linear interpolation instruction.
522        */
523       instruction *
LRP(const dst_reg & dst,const src_reg & x,const src_reg & y,const src_reg & a)524       LRP(const dst_reg &dst, const src_reg &x, const src_reg &y,
525           const src_reg &a) const
526       {
527          /* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
528           * we need to reorder the operands.
529           */
530          assert(shader->devinfo->ver >= 6);
531          return emit(ELK_OPCODE_LRP, dst, a, y, x);
532       }
533 
534       elk_backend_shader *shader;
535 
536    protected:
537       /**
538        * Workaround for negation of UD registers.  See comment in
539        * elk_fs_generator::generate_code() for the details.
540        */
541       src_reg
fix_unsigned_negate(const src_reg & src)542       fix_unsigned_negate(const src_reg &src) const
543       {
544          if (src.type == ELK_REGISTER_TYPE_UD && src.negate) {
545             dst_reg temp = vgrf(ELK_REGISTER_TYPE_UD);
546             MOV(temp, src);
547             return src_reg(temp);
548          } else {
549             return src;
550          }
551       }
552 
553       /**
554        * Workaround for register access modes not supported by the ternary
555        * instruction encoding.
556        */
557       src_reg
fix_3src_operand(const src_reg & src)558       fix_3src_operand(const src_reg &src) const
559       {
560          /* Using vec4 uniforms in SIMD4x2 programs is difficult. You'd like to be
561           * able to use vertical stride of zero to replicate the vec4 uniform, like
562           *
563           *    g3<0;4,1>:f - [0, 4][1, 5][2, 6][3, 7]
564           *
565           * But you can't, since vertical stride is always four in three-source
566           * instructions. Instead, insert a MOV instruction to do the replication so
567           * that the three-source instruction can consume it.
568           */
569 
570          /* The MOV is only needed if the source is a uniform or immediate. */
571          if (src.file != UNIFORM && src.file != IMM)
572             return src;
573 
574          if (src.file == UNIFORM && elk_is_single_value_swizzle(src.swizzle))
575             return src;
576 
577          const dst_reg expanded = vgrf(src.type);
578          emit(ELK_VEC4_OPCODE_UNPACK_UNIFORM, expanded, src);
579          return src_reg(expanded);
580       }
581 
582       /**
583        * Workaround for register access modes not supported by the math
584        * instruction.
585        */
586       src_reg
fix_math_operand(const src_reg & src)587       fix_math_operand(const src_reg &src) const
588       {
589          /* The gfx6 math instruction ignores the source modifiers --
590           * swizzle, abs, negate, and at least some parts of the register
591           * region description.
592           *
593           * Rather than trying to enumerate all these cases, *always* expand the
594           * operand to a temp GRF for gfx6.
595           *
596           * For gfx7, keep the operand as-is, except if immediate, which gfx7 still
597           * can't use.
598           */
599          if (shader->devinfo->ver == 6 ||
600              (shader->devinfo->ver == 7 && src.file == IMM)) {
601             const dst_reg tmp = vgrf(src.type);
602             MOV(tmp, src);
603             return src_reg(tmp);
604          } else {
605             return src;
606          }
607       }
608 
609       /**
610        * Workaround other weirdness of the math instruction.
611        */
612       instruction *
fix_math_instruction(instruction * inst)613       fix_math_instruction(instruction *inst) const
614       {
615          if (shader->devinfo->ver == 6 &&
616              inst->dst.writemask != WRITEMASK_XYZW) {
617             const dst_reg tmp = vgrf(inst->dst.type);
618             MOV(inst->dst, src_reg(tmp));
619             inst->dst = tmp;
620 
621          } else if (shader->devinfo->ver < 6) {
622             const unsigned sources = (inst->src[1].file == BAD_FILE ? 1 : 2);
623             inst->base_mrf = 1;
624             inst->mlen = sources;
625          }
626 
627          return inst;
628       }
629 
630       elk_bblock_t *block;
631       exec_node *cursor;
632 
633       unsigned _dispatch_width;
634       unsigned _group;
635       bool force_writemask_all;
636 
637       /** Debug annotation info. */
638       struct {
639          const char *str;
640          const void *ir;
641       } annotation;
642    };
643 }
644