nir_opcodes.py - OpenGrok cross reference for /third_party/mesa3d/src/compiler/nir/nir

Lines Matching +full:vc4 +full:- +full:rules
39       - name is the name of the opcode (prepend nir_op_ for the enum name)
40       - all types are strings that get nir_type_ prepended to them
41       - input_types is a list of types
42       - is_conversion is true if this opcode represents a type conversion
43       - algebraic_properties is a space-seperated string, where nir_op_is_ is
45       - const_expr is an expression or series of statements that computes the
49       src(N-1), where N is the number of arguments.  The output of the
50       expression should be stored in the dst variable.  Per-component input
51       and output variables will be scalars and non-per-component input and
58       For per-component instructions, the entire expression will be
59       executed once for each component.  For non-per-component
64       per-component instructions and "dst.x = dst.y = ... = <expression>"
65       for non-per-component instructions.
144 # For 2-source operations, this just mathematical commutativity.  Some
145 # 3-source operations, like ffma, are only commutative in the first two
196 unop("ineg", tint, "-src0")
197 unop("fneg", tfloat, "-src0")
200 # nir_op_fsign roughly implements the OpenGL / Vulkan rules for sign(float).
203 #    Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
215 …               "(isnan(src0) ? 0.0  : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0  : -1.0 )) : " +
216 …                  "(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"))
217 unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
218 unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
297 # Unary floating-point rounding operations.
303 unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
306 unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half…
448    dst |= ((src0 >> bit) & 1) << (31 - bit);
459 dst = -1;
460 for (int bit = bit_size - 1; bit >= 0; bit--) {
469 dst = -1;
480 for (bit = bit_size - 1; bit >= 0; bit--) {
484 dst = (unsigned)(bit_size - bit - 1);
488 dst = -1;
489 for (int bit = bit_size - 1; bit >= 0; bit--) {
502 dst = -1;
514 dst = -1;
535 if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; }
536 if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; }
538 if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; }
539 if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; }
540 if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; }
647          (src0 - src1 < src0 ? u_intN_max(bit_size) : src0 - src1) :
648          (src0 < src0 - src1 ? u_intN_min(bit_size) : src0 - src1)
650 binop("usub_sat", tuint, "", "src0 < src1 ? 0 : src0 - src1")
657       dst = _mesa_double_to_float_rtz((double)src0 - (double)src1);
659    dst = src0 - src1;
662 binop("isub", tint, "", "src0 - src1")
664               src1 > src0 ? (uint64_t) src1 - (uint64_t) src0
665                           : (uint64_t) src0 - (uint64_t) src1
667 binop("uabs_usub", tuint, "", "(src1 > src0) ? (src1 - src0) : (src0 - src1)")
681 # fmulz(0.0, inf) and fmulz(0.0, nan) must be +/-0.0, even if
693 # low 32-bits of signed/unsigned integer multiply
695    /* Use 64-bit multiplies to prevent overflow of signed arithmetic */
705 # high 32-bits of signed integer multiply
708    /* We need to do a full 128-bit x 128-bit multiply in order for the sign
709     * extension to work properly.  The casts are kind-of annoying but needed
728    /* First, sign-extend to 64-bit, then convert to unsigned to prevent
734 # high 32-bits of unsigned integer multiply
737    /* The casts are kind-of annoying but needed to prevent compiler warnings. */
748 # low 32-bits of unsigned integer multiply
750 uint64_t mask = (1 << (bit_size / 2)) - 1;
754 # Multiply 32-bits with low 16-bits.
773 # x + y = x - (x & ~y) + (x & ~y) + y - (~x & y) + (~x & y)
786 # x + y + 1 = x + (~x & y) - (~x & y) + y + (x & ~y) - (x & ~y) + 1
787 #           =      (x | y) - (~x & y) +      (x | y) - (x & ~y) + 1
788 #           = 2 *  (x | y) - ((~x & y) +               (x & ~y)) + 1
789 #           =     ((x | y) << 1) - (x ^ y) + 1
793 # (x + y + 1) >> 1 = (x | y) + (-(x ^ y) + 1) >> 1)
794 #                  = (x | y) -  ((x ^ y)      >> 1)
795 binop("irhadd", tint, _2src_commutative, "(src0 | src1) - ((src0 ^ src1) >> 1)")
796 binop("urhadd", tuint, _2src_commutative, "(src0 | src1) - ((src0 ^ src1) >> 1)")
802 # SPIR-V.  The irem opcode implements the standard C/C++ signed "%"
812 binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)")
813 binop("frem", tfloat, "", "src0 - src1 * truncf(src0 / src1)")
820 # these integer-aware comparisons return a boolean (0 or ~0)
833 # integer-aware GLSL-style comparisons that compare floats and ints
844 # non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
851 # These comparisons for integer-less hardware return 1.0 and 0.0 for true
859 # SPIRV shifts are undefined for shift-operands >= bitsize,
863        "(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))")
865        "src0 >> (src1 & (sizeof(src0) * 8 - 1))")
867        "src0 >> (src1 & (sizeof(src0) * 8 - 1))")
870    uint32_t rotate_mask = sizeof(src0) * 8 - 1;
872          (src0 >> (-src1 & rotate_mask));
875    uint32_t rotate_mask = sizeof(src0) * 8 - 1;
877          (src0 << (-src1 & rotate_mask));
931 dst = ((1u << bits) - 1) << offset;
941 # Combines the first component of each input to make a 2-component vector.
985 # ffmaz(0.0, inf, src2) and ffmaz(0.0, nan, src2) must be +/-0.0 + src2, even if
997 triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
1004 # A vector conditional select instruction (like ?:, but operating per-
1052    dst = (base << (32 - bits - offset)) >> (32 - bits);
1065    dst = (base << (32 - bits - offset)) >> (32 - bits);
1081    dst = (base >> offset) & ((1ull << bits) - 1);
1093    dst = (base << (32 - offset - bits)) >> (32 - bits); /* use sign-extending shift */
1099 # The first two sources contain packed 8-bit unsigned integers, the instruction
1101 # There is also a third source which is a 32-bit unsigned integer and added to the result.
1114         (s0_b0 > s1_b0 ? (s0_b0 - s1_b0) : (s1_b0 - s0_b0)) +
1115         (s0_b1 > s1_b1 ? (s0_b1 - s1_b1) : (s1_b1 - s0_b1)) +
1116         (s0_b2 > s1_b2 ? (s0_b2 - s1_b2) : (s1_b2 - s0_b2)) +
1117         (s0_b3 > s1_b3 ? (s0_b3 - s1_b3) : (s1_b3 - s0_b3));
1120 # Combines the first component of each input to make a 3-component vector.
1144    unsigned mask = ((1ull << bits) - 1) << offset;
1212 # ir3-specific instruction that maps directly to mul-add shift high mix,
1220 # ir3-specific instruction that maps directly to ir3 mad.s24.
1226 # r600-specific instruction that evaluates unnormalized cube texture coordinates
1241       dst.y = -src0.z; dst.x = -src0.y; dst.w = 0;
1244       dst.y = src0.z; dst.x = -src0.y; dst.w = 1;
1250       dst.y = src0.x; dst.x = -src0.z; dst.w = 3;
1253       dst.y = src0.x; dst.x = -src0.y; dst.w = 4;
1256       dst.y = -src0.x; dst.x = -src0.y; dst.w = 5;
1294 # vc4-specific opcodes
1311       dst |= (src0_chan - src1_chan) << i;
1341 # Mali-specific opcodes
1342 unop("fsat_signed_mali", tfloat, ("fmin(fmax(src0, -1.0), 1.0)"))
1362 # components are sign-extended to 32-bits, and a dot-product is performed on
1363 # the resulting vectors.  src2 is added to the result of the dot-product.
1394 # src2 is an int32.  The 8-bit components are extended to 32-bits, and a
1395 # dot-product is performed on the resulting vectors.  src2 is added to the
1396 # result of the dot-product.
1399 # and source 1 mean that this opcode is not 2-source commutative
1414 # Like sdot_4x8_iadd, but the result is clampled to the range [-0x80000000, 0x7ffffffff].
1448 # Like sudot_4x8_iadd, but the result is clampled to the range [-0x80000000, 0x7ffffffff].
1451 # and source 1 mean that this opcode is not 2-source commutative
1469 # components are sign-extended to 32-bits, and a dot-product is performed on
1470 # the resulting vectors.  src2 is added to the result of the dot-product.
1492 # Like sdot_2x16_iadd, but the result is clampled to the range [-0x80000000, 0x7ffffffff].