nir_opcodes.py - OpenGrok cross reference for /third_party/mesa3d/src/compiler/nir/nir

Lines Matching +full:vc4 +full:- +full:rules
39       - name is the name of the opcode (prepend nir_op_ for the enum name)
40       - all types are strings that get nir_type_ prepended to them
41       - input_types is a list of types
42       - is_conversion is true if this opcode represents a type conversion
43       - algebraic_properties is a space-seperated string, where nir_op_is_ is
45       - const_expr is an expression or series of statements that computes the
49       src(N-1), where N is the number of arguments.  The output of the
50       expression should be stored in the dst variable.  Per-component input
51       and output variables will be scalars and non-per-component input and
58       For per-component instructions, the entire expression will be
59       executed once for each component.  For non-per-component
64       per-component instructions and "dst.x = dst.y = ... = <expression>"
65       for non-per-component instructions.
146 # For 2-source operations, this just mathematical commutativity.  Some
147 # 3-source operations, like ffma, are only commutative in the first two
197 unop("ineg", tint, "-src0")
198 unop("fneg", tfloat, "-src0")
201 # nir_op_fsign roughly implements the OpenGL / Vulkan rules for sign(float).
204 #    Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
216 …               "(isnan(src0) ? 0.0  : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0  : -1.0 )) : " +
217 …                  "(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"))
218 unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
219 unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
298 # Unary floating-point rounding operations.
304 unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
307 unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half…
437    dst |= ((src0 >> bit) & 1) << (31 - bit);
448 dst = -1;
449 for (int bit = bit_size - 1; bit >= 0; bit--) {
458 dst = -1;
469 for (bit = bit_size - 1; bit >= 0; bit--) {
473 dst = (unsigned)(31 - bit);
477 dst = -1;
478 for (int bit = 31; bit >= 0; bit--) {
491 dst = -1;
492 if (src0 != 0 && src0 != -1) {
507 dst = -1;
528 if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; }
529 if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; }
531 if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; }
532 if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; }
533 if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; }
640          (src0 - src1 < src0 ? u_intN_max(bit_size) : src0 - src1) :
641          (src0 < src0 - src1 ? u_intN_min(bit_size) : src0 - src1)
643 binop("usub_sat", tuint, "", "src0 < src1 ? 0 : src0 - src1")
650       dst = _mesa_double_to_float_rtz((double)src0 - (double)src1);
652    dst = src0 - src1;
655 binop("isub", tint, "", "src0 - src1")
657               src1 > src0 ? (uint64_t) src1 - (uint64_t) src0
658                           : (uint64_t) src0 - (uint64_t) src1
660 binop("uabs_usub", tuint, "", "(src1 > src0) ? (src1 - src0) : (src0 - src1)")
672 # low 32-bits of signed/unsigned integer multiply
674    /* Use 64-bit multiplies to prevent overflow of signed arithmetic */
684 # high 32-bits of signed integer multiply
687    /* We need to do a full 128-bit x 128-bit multiply in order for the sign
688     * extension to work properly.  The casts are kind-of annoying but needed
707    /* First, sign-extend to 64-bit, then convert to unsigned to prevent
713 # high 32-bits of unsigned integer multiply
716    /* The casts are kind-of annoying but needed to prevent compiler warnings. */
727 # low 32-bits of unsigned integer multiply
729 uint64_t mask = (1 << (bit_size / 2)) - 1;
733 # Multiply 32-bits with low 16-bits.
752 # x + y = x - (x & ~y) + (x & ~y) + y - (~x & y) + (~x & y)
765 # x + y + 1 = x + (~x & y) - (~x & y) + y + (x & ~y) - (x & ~y) + 1
766 #           =      (x | y) - (~x & y) +      (x | y) - (x & ~y) + 1
767 #           = 2 *  (x | y) - ((~x & y) +               (x & ~y)) + 1
768 #           =     ((x | y) << 1) - (x ^ y) + 1
772 # (x + y + 1) >> 1 = (x | y) + (-(x ^ y) + 1) >> 1)
773 #                  = (x | y) -  ((x ^ y)      >> 1)
774 binop("irhadd", tint, _2src_commutative, "(src0 | src1) - ((src0 ^ src1) >> 1)")
775 binop("urhadd", tuint, _2src_commutative, "(src0 | src1) - ((src0 ^ src1) >> 1)")
781 # SPIR-V.  The irem opcode implements the standard C/C++ signed "%"
791 binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
792 binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / src1)")
799 # these integer-aware comparisons return a boolean (0 or ~0)
812 # integer-aware GLSL-style comparisons that compare floats and ints
823 # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
830 # These comparisons for integer-less hardware return 1.0 and 0.0 for true
838 # SPIRV shifts are undefined for shift-operands >= bitsize,
842        "(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))")
844        "src0 >> (src1 & (sizeof(src0) * 8 - 1))")
846        "src0 >> (src1 & (sizeof(src0) * 8 - 1))")
849    uint32_t rotate_mask = sizeof(src0) * 8 - 1;
851          (src0 >> (-src1 & rotate_mask));
854    uint32_t rotate_mask = sizeof(src0) * 8 - 1;
856          (src0 << (-src1 & rotate_mask));
910 dst = ((1u << bits) - 1) << offset;
920 # Combines the first component of each input to make a 2-component vector.
963 triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
970 # A vector conditional select instruction (like ?:, but operating per-
1018    dst = (base << (32 - bits - offset)) >> (32 - bits);
1031    dst = (base << (32 - bits - offset)) >> (32 - bits);
1047    dst = (base >> offset) & ((1ull << bits) - 1);
1059    dst = (base << (32 - offset - bits)) >> (32 - bits); /* use sign-extending shift */
1065 # The first two sources contain packed 8-bit unsigned integers, the instruction
1067 # There is also a third source which is a 32-bit unsigned integer and added to the result.
1080         (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) +
1081         (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) +
1082         (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) +
1083         (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3));
1086 # Combines the first component of each input to make a 3-component vector.
1110    unsigned mask = ((1ull << bits) - 1) << offset;
1178 # ir3-specific instruction that maps directly to mul-add shift high mix,
1186 # ir3-specific instruction that maps directly to ir3 mad.s24.
1192 # r600-specific instruction that evaluates unnormalized cube texture coordinates
1207       dst.y = -src0.z; dst.x = -src0.y; dst.w = 0;
1210       dst.y = src0.z; dst.x = -src0.y; dst.w = 1;
1216       dst.y = src0.x; dst.x = -src0.z; dst.w = 3;
1219       dst.y = src0.x; dst.x = -src0.y; dst.w = 4;
1222       dst.y = -src0.x; dst.x = -src0.y; dst.w = 5;
1260 # vc4-specific opcodes
1277       dst |= (src0_chan - src1_chan) << i;
1307 # Mali-specific opcodes
1308 unop("fsat_signed_mali", tfloat, ("fmin(fmax(src0, -1.0), 1.0)"))
1328 # components are sign-extended to 32-bits, and a dot-product is performed on
1329 # the resulting vectors.  src2 is added to the result of the dot-product.
1360 # src2 is an int32.  The 8-bit components are extended to 32-bits, and a
1361 # dot-product is performed on the resulting vectors.  src2 is added to the
1362 # result of the dot-product.
1365 # and source 1 mean that this opcode is not 2-source commutative
1380 # Like sdot_4x8_iadd, but the result is clampled to the range [-0x80000000, 0x7ffffffff].
1414 # Like sudot_4x8_iadd, but the result is clampled to the range [-0x80000000, 0x7ffffffff].
1417 # and source 1 mean that this opcode is not 2-source commutative
1435 # components are sign-extended to 32-bits, and a dot-product is performed on
1436 # the resulting vectors.  src2 is added to the result of the dot-product.
1458 # Like sdot_2x16_iadd, but the result is clampled to the range [-0x80000000, 0x7ffffffff].