/* * Copyright 2024 Valve Corporation * Copyright 2023 Alyssa Rosenzweig * SPDX-License-Identifier: MIT */ #pragma once /* * This header adds definitions that are common between the CPU and the GPU for * shared headers. It also fills in basic standard library holes for internal * OpenCL. */ #ifndef __OPENCL_VERSION__ /* The OpenCL version of this header defines many OpenCL versions of stdint.h * and util/macros.h functions. #include both here for consistency in shared * headers. */ #include #include "util/macros.h" /* Structures defined in common host/device headers that include device pointers * need to resolve to a real pointer in OpenCL but an opaque 64-bit address on * the host. The DEVICE macro facilitates that. */ #define DEVICE(type_) uint64_t /* However, inline functions defined in common host/device headers that take * pointers need to resolve to pointers on either host or device. (Host pointers * on the host, device pointers on the device.) This would be automatic with * OpenCL generic pointers, but those can cause headaches and lose constantness, * so these defines allow GLOBAL/CONST keywords to be used even in CPU code. * Annoyingly, we can't use global/constant here because it conflicts with C++ * standard library headers. */ #define GLOBAL #define CONST const #else /* GenXML likes to use fp16. Since fp16 is supported by all grown up drivers, we * just enable the extension everywhere. */ #pragma OPENCL EXTENSION cl_khr_fp16 : enable /* The OpenCL side of DEVICE must resolve to real pointer types, unlike * the host version. */ #define DEVICE(type_) global type_ * /* Passthrough */ #define GLOBAL global #define CONST constant /* OpenCL lacks explicitly sized integer types, but we know the sizes of * particular integer types. These typedefs allow defining common headers with * explicit integer types (and therefore compatible data layouts). */ typedef ulong uint64_t; typedef uint uint32_t; typedef ushort uint16_t; typedef uchar uint8_t; typedef long int64_t; typedef int int32_t; typedef short int16_t; typedef char int8_t; /* OpenCL C lacks static_assert, a part of C11. This makes static_assert * available on both host and device. It is defined as variadic to handle also * no-message static_asserts (standardized in C23). */ #define _S(x) #x #define _PASTE_(x, y) x##y #define _PASTE(x, y) _PASTE_(x, y) #define static_assert(_COND, ...) \ typedef char _PASTE(static_assertion, __LINE__)[(_COND) ? 1 : -1] /* NIR's precompilation infrastructure requires specifying a workgroup size with * the kernel, via reqd_work_group_size. Unfortunately, reqd_work_group_size has * terrible ergonomics, so we provide these aliases instead. */ #define KERNEL3D(x, y, z) \ __attribute__((reqd_work_group_size(x, y, z))) kernel void #define KERNEL2D(x, y) KERNEL3D(x, y, 1) #define KERNEL(x) KERNEL2D(x, 1) /* stddef.h usually defines this. We don't have that on the OpenCL side but we * can use the builtin. */ #define offsetof(x, y) __builtin_offsetof(x, y) /* This is not an exact match for the util/macros.h version but without the * aligned(4) we get garbage code gen and in practice this is what you want. */ #define PACKED __attribute__((packed, aligned(4))) /* OpenCL C doesn't seem to have an equivalent for this but it doesn't matter. * Compare util/macros.h */ #define ENUM_PACKED /* FILE * pointers can be useful in function signatures shared across * host/device, but are meaningless in OpenCL. Turn them into void* to allow * consistent prototype across host/device even though there won't be an actual * file pointer on the device side. */ #define FILE void /* OpenCL C lacks a standard memcpy, but clang has one that will be plumbed into * a NIR memcpy intrinsic. This is not a competent implementation of memcpy for * large amounts of data, since it's necessarily single threaded, but memcpy is * too useful for shared CPU/GPU code that it's worth making the standard * library function work. */ #define memcpy __builtin_memcpy /* OpenCL C lacks a standard abort, so we plumb through the NIR intrinsic. */ void nir_printf_abort(void); static inline void abort(void) { nir_printf_abort(); } /* OpenCL C lacks a standard assert. We implement one on top of abort. We are * careful to use a single printf so the lines don't get split up if multiple * threads assert in parallel. */ #ifndef NDEBUG #define _ASSERT_STRING(x) _ASSERT_STRING_INNER(x) #define _ASSERT_STRING_INNER(x) #x #define assert(x) if (!(x)) { \ printf("Shader assertion fail at " __FILE__ ":" \ _ASSERT_STRING(__LINE__) "\nExpected " #x "\n\n"); \ nir_printf_abort(); \ } #else #define assert(x) #endif /* Core OpenCL C like likely/unlikely. We might be able to map to a clang built * in though... */ #define likely(x) (x) #define unlikely(x) (x) /* These duplicate the C standard library and are required for the * u_intN_min/max implementations. */ #define UINT64_MAX 18446744073709551615ul #define INT64_MAX 9223372036854775807l /* These duplicate util/macros.h. This could maybe be cleaned up */ #define BITFIELD_BIT(b) (1u << b) #define BITFIELD_MASK(m) (((m) == 32) ? 0xffffffff : ((1u << (m)) - 1)) #define ASSERTED #define ALWAYS_INLINE #define UNUSED static inline int64_t u_intN_max(unsigned bit_size) { assert(bit_size <= 64 && bit_size > 0); return INT64_MAX >> (64 - bit_size); } static inline int64_t u_intN_min(unsigned bit_size) { return (-u_intN_max(bit_size)) - 1; } static inline uint64_t u_uintN_max(unsigned bit_size) { assert(bit_size <= 64 && bit_size > 0); return UINT64_MAX >> (64 - bit_size); } static inline uint align(uint x, uint y) { return (x + y - 1) & ~(y - 1); } static inline uint32_t util_logbase2(uint32_t n) { return (31 - clz(n | 1)); } static inline uint32_t util_logbase2_ceil(uint32_t n) { return (n <= 1) ? 0 : 32 - clz(n - 1); } #define BITFIELD64_MASK(x) ((x == 64) ? ~0ul : ((1ul << x) - 1)) #define IS_POT(v) (((v) & ((v) - 1)) == 0) #define IS_POT_NONZERO(v) ((v) != 0 && IS_POT(v)) #define DIV_ROUND_UP(A, B) (((A) + (B) - 1) / (B)) #define CLAMP(X, MIN, MAX) ((X) > (MIN) ? ((X) > (MAX) ? (MAX) : (X)) : (MIN)) #define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1)) #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) static inline uint32_t fui(float f) { return as_uint(f); } static inline float uif(uint32_t ui) { return as_float(ui); } #define CL_FLT_EPSILON 1.1920928955078125e-7f /* OpenCL C lacks roundf and llroundf, we can emulate it */ static inline float roundf(float x) { return trunc(x + copysign(0.5f - 0.25f * CL_FLT_EPSILON, x)); } static inline long long llroundf(float x) { return roundf(x); } static inline uint16_t _mesa_float_to_half(float f) { return as_ushort(convert_half(f)); } static inline float _mesa_half_to_float(uint16_t w) { return convert_float(as_half(w)); } /* Duplicates u_math.h. We should make that header CL safe at some point... */ static inline int64_t util_sign_extend(uint64_t val, unsigned width) { unsigned shift = 64 - width; return (int64_t)(val << shift) >> shift; } #endif