Lines Matching +full:vc4 +full:- +full:rules
27 # The Intrinsic class corresponds one-to-one with nir_intrinsic_info
49 - name: the intrinsic name
50 - src_components: list of the number of components per src, 0 means
53 - dest_components: number of destination components, -1 means no
56 - indices: list of constant indicies
57 - flags: list of semantic flags
58 - sysval: is this a system-value intrinsic
59 - bit_sizes: allowed dest bit_sizes or the source it must match
90 self.bit_size_src = bit_sizes[1] if isinstance(bit_sizes, tuple) else -1
118 def intrinsic(name, src_comp=[], dest_comp=-1, indices=[],
135 # The stream-id for GS emit_vertex/end_primitive intrinsics.
138 # The clip-plane id for load_user_clip_plane intrinsic.
181 # Non-zero if we are accessing an array image
189 # not set at the intrinsic if the NIR was created from SPIR-V.
205 # (X - align_offset) % align_mul == 0
282 # Whether to sign-extend offsets in address arithmatic (else zero extend)
302 # The bit-size of each channel; must be one of 1, 8, 16, 32, or 64
308 # On a register load, floating-point absolute value/negate loaded value.
312 # On a register store, floating-point saturate the stored value.
333 intrinsic("load_deref", dest_comp=0, src_comp=[-1],
335 intrinsic("store_deref", src_comp=[-1, 0], indices=[WRITE_MASK, ACCESS])
336 intrinsic("copy_deref", src_comp=[-1, -1], indices=[DST_ACCESS, SRC_ACCESS])
337 intrinsic("memcpy_deref", src_comp=[-1, -1, 1], indices=[DST_ACCESS, SRC_ACCESS])
340 # logically def-use list of a register is given by the use list of this handle.
342 # itself is always a 32-bit scalar.
382 intrinsic("deref_buffer_array_length", src_comp=[-1], dest_comp=1,
387 intrinsic("get_ssbo_size", src_comp=[-1], dest_comp=1, bit_sizes=[32],
389 intrinsic("get_ubo_size", src_comp=[-1], dest_comp=1,
392 # Intrinsics which provide a run-time mode-check. Unlike the compile-time
394 intrinsic("deref_mode_is", src_comp=[-1], dest_comp=1,
396 intrinsic("addr_mode_is", src_comp=[-1], dest_comp=1,
421 # SpvOpTerminateInvocation from SPIR-V. Essentially a discard "for real".
424 # Control/Memory barrier with explicit scope. Follows the semantics of SPIR-V
461 # Additional SPIR-V ballot intrinsics
463 # These correspond to the SPIR-V opcodes
487 # Ballot ALU operations from SPIR-V.
499 # Shuffle operations from SPIR-V.
505 # Quad operations from SPIR-V.
511 # Similar to vote_any and vote_all, but per-quad instead of per-wavefront.
517 # Rotate operation from SPIR-V: SpvOpGroupNonUniformRotateKHR.
542 # emit_vertex implements GLSL's EmitStreamVertex() built-in. It takes a single
545 # end_primitive implements GLSL's EndPrimitive() built-in.
569 # Rules:
570 # - This is a terminating instruction.
571 # - May only occur in workgroup-uniform control flow.
572 # - Dispatch sizes may be divergent (in which case the values
575 # - BASE: address of the task_payload variable used.
576 # - RANGE: size of the task_payload variable used.
582 # Same rules as launch_mesh_workgroups apply here as well.
584 intrinsic("launch_mesh_workgroups_with_payload_deref", src_comp=[3, -1], indices=[])
600 intrinsic("trace_ray", src_comp=[-1, 1, 1, 1, 1, 1, 3, 1, 3, 1, -1])
604 intrinsic("accept_ray_intersection") # Not in SPIR-V; useful for lowering
607 intrinsic("execute_callable", src_comp=[1, -1])
619 intrinsic("rq_initialize", src_comp=[-1, -1, 1, 1, 3, 1, 3, 1])
621 intrinsic("rq_terminate", src_comp=[-1])
623 intrinsic("rq_proceed", src_comp=[-1], dest_comp=1)
625 intrinsic("rq_generate_intersection", src_comp=[-1, 1])
627 intrinsic("rq_confirm_intersection", src_comp=[-1])
629 intrinsic("rq_load", src_comp=[-1], dest_comp=0, indices=[RAY_QUERY_VALUE,COMMITTED,COLUMN])
644 intrinsic("rt_execute_callable", src_comp=[1, -1], indices=[CALL_IDX,STACK_SIZE])
648 intrinsic("rt_trace_ray", src_comp=[-1, 1, 1, 1, 1, 1, 3, 1, 3, 1, -1],
659 # The register offset may be non-constant but must by dynamically uniform
664 intrinsic(name + "_deref", src_comp=[-1], dest_comp=1, flags=flags)
668 intrinsic(name + "_deref", src_comp=[-1, 1], dest_comp=1)
672 intrinsic(name + "_deref", src_comp=[-1, 1, 1], dest_comp=1)
700 # All image intrinsics take a four-coordinate vector and a sample index as
703 # in use are undefined. Image store takes an additional four-component
711 intrinsic("image_deref_" + name, src_comp=[-1] + src_comp,
715 intrinsic("bindless_image_" + name, src_comp=[-1] + src_comp,
729 # Non-uniform access is not lowered for image_descriptor_amd.
732 # CL-specific format queries
763 intrinsic("load_vulkan_descriptor", src_comp=[-1], dest_comp=0,
783 # 0: The SSBO buffer index (dynamically uniform in GLSL, possibly non-uniform
810 # has the low 32-bit and component Y has the high 32-bit.
815 # AGX global variants take a 64-bit base address plus a 32-bit offset in words.
816 # The offset is sign-extended or zero-extended based on the SIGN_EXTEND index.
818 intrinsic("deref_atomic", src_comp=[-1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
819 intrinsic("ssbo_atomic", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
828 intrinsic("deref_atomic_swap", src_comp=[-1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
829 intrinsic("ssbo_atomic_swap", src_comp=[-1, 1, 1, 1], dest_comp=1, indices=[ACCESS, ATOMIC_OP])
844 # 16-bit integer vec2 of the pixel X/Y in the framebuffer.
862 # sample_id_no_per_sample is like sample_id but does not imply per-
866 # sample_pos_or_center is like sample_pos but does not imply per-sample
867 # shading. When per-sample dispatch is not enabled, it returns (0.5, 0.5).
883 # non-zero_base indicates the base is included
904 # ---------------- --------------------------------
971 # Driver-specific viewport scale/offset parameters.
973 # VC4 and V3D need to emit a scaled version of the position in the vertex
1002 # shader prolog to handle two-sided color without recompiles and therefore
1021 # The first four are for the simple cases: pixel, centroid, per-sample
1117 load("ubo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET, RANGE_BASE, RANGE], flags=[CAN_ELIMINATE, CA…
1119 load("ubo_vec4", [-1, 1], [ACCESS, BASE, COMPONENT], flags=[CAN_ELIMINATE, CAN_REORDER])
1130 load("ssbo", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
1181 store("ssbo", [-1, 1], [WRITE_MASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
1237 # A 64-bit bitfield indexed by I/O location storing 1 in bits corresponding to
1249 # ordering follows SPIR-V operation.
1255 intrinsic("cmat_construct", src_comp=[-1, 1])
1256 intrinsic("cmat_load", src_comp=[-1, -1, 1], indices=[MATRIX_LAYOUT])
1257 intrinsic("cmat_store", src_comp=[-1, -1, 1], indices=[MATRIX_LAYOUT])
1259 intrinsic("cmat_muladd", src_comp=[-1, -1, -1, -1], indices=[SATURATE, CMAT_SIGNED_MASK])
1260 intrinsic("cmat_unary_op", src_comp=[-1, -1], indices=[ALU_OP])
1261 intrinsic("cmat_binary_op", src_comp=[-1, -1, -1], indices=[ALU_OP])
1262 intrinsic("cmat_scalar_op", src_comp=[-1, -1, -1], indices=[ALU_OP])
1263 intrinsic("cmat_bitcast", src_comp=[-1, -1])
1264 intrinsic("cmat_extract", src_comp=[-1, 1], dest_comp=1)
1265 intrinsic("cmat_insert", src_comp=[-1, 1, -1, 1])
1266 intrinsic("cmat_copy", src_comp=[-1, -1])
1268 # IR3-specific version of most SSBO intrinsics. The only different
1270 # the dword-offset, which is needed by the backend code apart from
1271 # the byte-offset already provided by NIR in one of the sources.
1275 # dword-offset always in the last source.
1308 # IR3-specific intrinsics for tessellation control shaders. cond_end_ir3 end
1316 # Per-view gl_FragSizeEXT and gl_FragCoord offset.
1322 # IR3-specific load/store intrinsics. These access a buffer used to pass data
1323 # between geometry stages - perhaps it's explicit access to the vertex cache.
1330 # IR3-specific load/store global intrinsics. They take a 64-bit base address
1331 # and a 32-bit offset. The hardware will add the base and the offset, which
1332 # saves us from doing 64-bit math on the base address.
1342 # IR3-specific bindless handle specifier. Similar to vulkan_resource_index, but
1348 # IR3-specific intrinsics for shader preamble. These are meant to be used like
1365 # IR3-specific intrinsic for stc. Should be used in the shader preamble.
1368 # IR3-specific intrinsic for ldc.k. Copies UBO to constant file.
1373 # IR3-specific intrinsic for ldg.k.
1378 # IR3-specific intrinsic for stsc. Loads from push consts to constant file
1400 # an sRGB->linear conversion, but linear values should be written to
1401 # raw_output_pan and the hardware handles linear->sRGB.
1404 # single 128-bit chunk. To support multisampling, the BASE index specifies the
1424 # Loads the sample position array on Bifrost, in a packed Arm-specific format
1427 # In a fragment shader, is the framebuffer single-sampled? 0/~0 bool
1438 # load as many components as needed giving per-component addresses
1473 # src[] = { address, unsigned 32-bit offset }.
1475 # src[] = { value, address, unsigned 32-bit offset }.
1490 # Descriptor where ES outputs are stored for GS to read on GFX6-8
1516 # Vertex offsets used for GS per-vertex inputs
1521 # Descriptor where GS outputs are stored for GS copy shader to read on GFX6-9
1536 # subgroup uniform and bits 0-7 must be less than or equal to the wave size.
1545 # For NGG passthrough mode only. Pre-packed argument for export_primitive_amd.
1553 # Merged wave info. Bits 0-7 are the ES thread count, 8-15 are the GS thread count, 16-24 is the
1554 # GS Wave ID, 24-27 is the wave index in the workgroup, and 28-31 is the workgroup size in waves.
1585 # 2. BVH node(64-bit pointer as 2x32 ...)
1641 # src[] = { 32/64-bit base address, 32-bit offset }.
1652 # Vertex stride in LS-HS buffer
1655 # Vertex stride in ES-GS buffer
1731 # V3D-specific instrinc for tile buffer color reads.
1740 # V3D-specific instrinc for per-sample tile buffer color writes.
1742 # The driver backend needs to identify per-sample color writes and emit
1749 # V3D-specific intrinsic to load the number of layers attached to
1754 # Equivalent to popcount(ballot(true) & ((1 << subgroup_invocation) - 1))
1757 # With [0, 1] clipping, no transform is needed on the output z' = z. But with [-1,
1760 # for [-1, 1].
1795 # This system value facilitates that. 16-bit 0/~0 bool allows easy masking.
1801 # converting between floating-point registers and normalized memory formats.
1832 # targets within the end-of-tile shader, although it is valid in general compute
1840 # non-multisampled images. It must be 2D or MS.
1852 # determines whether sign- or zero-extension is used for the index.
1878 # Load a driver-internal system value from a given system value set at a given
1893 # per-sample discard, or an inverted accumulating gl_SampleMask write. The
1899 # load the 32-bit stipple pattern for that row.
1903 # The fixed-function sample mask specified in the API (e.g. glSampleMask)
1906 # Loads the sample position array as fixed point packed into a 32-bit word
1909 # Loads the fixed-function glPointSize() value
1966 # Intel-specific query for loading from the isl_image_param struct passed
1975 # Intrinsic to load a block of at least 32B of constant data from a 64-bit
1976 # global memory address. The memory address must be uniform and 32B-aligned.
1986 # Load a relocatable 32-bit value
2006 # 64-bit global address for a Vulkan descriptor set
2017 intrinsic("load_deref_block_intel", dest_comp=0, src_comp=[-1],
2019 intrinsic("store_deref_block_intel", src_comp=[-1, 0], indices=[WRITE_MASK, ACCESS])
2025 load("ssbo_block_intel", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
2034 store("ssbo_block_intel", [-1, 1], [WRITE_MASK, ACCESS, ALIGN_MUL, ALIGN_OFFSET])
2046 load("ubo_uniform_block_intel", [-1, 1],
2052 load("ssbo_uniform_block_intel", [-1, 1], [ACCESS, ALIGN_MUL, ALIGN_OFFSET], [CAN_ELIMINATE])
2075 # Intel-specific ray-tracing intrinsic
2079 # System values used for ray-tracing on Intel
2109 # NVIDIA-specific intrinsics
2132 # NVIDIA-specific Geometry Shader intrinsics.
2147 # NVIDIA-specific system values
2182 intrinsic("initialize_node_payloads", src_comp=[-1, 1, 1], indices=[EXECUTION_SCOPE])
2185 intrinsic("enqueue_node_payloads", src_comp=[-1])
2188 intrinsic("finalize_incoming_node_payload", src_comp=[-1], dest_comp=1)