• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Valve Corporation
3  * Copyright 2023 Alyssa Rosenzweig
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #pragma once
8 
9 /*
10  * This header adds definitions that are common between the CPU and the GPU for
11  * shared headers. It also fills in basic standard library holes for internal
12  * OpenCL.
13  */
14 
15 #ifndef __OPENCL_VERSION__
16 
17 /* The OpenCL version of this header defines many OpenCL versions of stdint.h
18  * and util/macros.h functions. #include both here for consistency in shared
19  * headers.
20  */
21 #include <stdint.h>
22 #include "util/macros.h"
23 
24 /* Structures defined in common host/device headers that include device pointers
25  * need to resolve to a real pointer in OpenCL but an opaque 64-bit address on
26  * the host. The DEVICE macro facilitates that.
27  */
28 #define DEVICE(type_) uint64_t
29 
30 /* However, inline functions defined in common host/device headers that take
31  * pointers need to resolve to pointers on either host or device. (Host pointers
32  * on the host, device pointers on the device.) This would be automatic with
33  * OpenCL generic pointers, but those can cause headaches and lose constantness,
34  * so these defines allow GLOBAL/CONST keywords to be used even in CPU code.
35  * Annoyingly, we can't use global/constant here because it conflicts with C++
36  * standard library headers.
37  */
38 #define GLOBAL
39 #define CONST const
40 
41 #else
42 
43 /* GenXML likes to use fp16. Since fp16 is supported by all grown up drivers, we
44  * just enable the extension everywhere.
45  */
46 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
47 
48 /* The OpenCL side of DEVICE must resolve to real pointer types, unlike
49  * the host version.
50  */
51 #define DEVICE(type_)   global type_ *
52 
53 /* Passthrough */
54 #define GLOBAL global
55 #define CONST constant
56 
57 /* OpenCL lacks explicitly sized integer types, but we know the sizes of
58  * particular integer types. These typedefs allow defining common headers with
59  * explicit integer types (and therefore compatible data layouts).
60  */
61 typedef ulong uint64_t;
62 typedef uint uint32_t;
63 typedef ushort uint16_t;
64 typedef uchar uint8_t;
65 
66 typedef long int64_t;
67 typedef int int32_t;
68 typedef short int16_t;
69 typedef char int8_t;
70 
71 /* OpenCL C lacks static_assert, a part of C11. This makes static_assert
72  * available on both host and device. It is defined as variadic to handle also
73  * no-message static_asserts (standardized in C23).
74  */
75 #define _S(x) #x
76 #define _PASTE_(x, y) x##y
77 #define _PASTE(x, y) _PASTE_(x, y)
78 #define static_assert(_COND, ...)                                              \
79    typedef char _PASTE(static_assertion, __LINE__)[(_COND) ? 1 : -1]
80 
81 /* NIR's precompilation infrastructure requires specifying a workgroup size with
82  * the kernel, via reqd_work_group_size. Unfortunately, reqd_work_group_size has
83  * terrible ergonomics, so we provide these aliases instead.
84  */
85 #define KERNEL3D(x, y, z)                                                      \
86    __attribute__((reqd_work_group_size(x, y, z))) kernel void
87 
88 #define KERNEL2D(x, y)   KERNEL3D(x, y, 1)
89 #define KERNEL(x)        KERNEL2D(x, 1)
90 
91 /* stddef.h usually defines this. We don't have that on the OpenCL side but we
92  * can use the builtin.
93  */
94 #define offsetof(x, y) __builtin_offsetof(x, y)
95 
96 /* This is not an exact match for the util/macros.h version but without the
97  * aligned(4) we get garbage code gen and in practice this is what you want.
98  */
99 #define PACKED __attribute__((packed, aligned(4)))
100 
101 /* OpenCL C doesn't seem to have an equivalent for this but it doesn't matter.
102  * Compare util/macros.h
103  */
104 #define ENUM_PACKED
105 
106 /* FILE * pointers can be useful in function signatures shared across
107  * host/device, but are meaningless in OpenCL. Turn them into void* to allow
108  * consistent prototype across host/device even though there won't be an actual
109  * file pointer on the device side.
110  */
111 #define FILE void
112 
113 /* OpenCL C lacks a standard memcpy, but clang has one that will be plumbed into
114  * a NIR memcpy intrinsic. This is not a competent implementation of memcpy for
115  * large amounts of data, since it's necessarily single threaded, but memcpy is
116  * too useful for shared CPU/GPU code that it's worth making the standard
117  * library function work.
118  */
119 #define memcpy __builtin_memcpy
120 
121 /* OpenCL C lacks a standard abort, so we plumb through the NIR intrinsic. */
122 void nir_printf_abort(void);
abort(void)123 static inline void abort(void) { nir_printf_abort(); }
124 
125 /* OpenCL C lacks a standard assert. We implement one on top of abort. We are
126  * careful to use a single printf so the lines don't get split up if multiple
127  * threads assert in parallel.
128  */
129 #ifndef NDEBUG
130 #define _ASSERT_STRING(x) _ASSERT_STRING_INNER(x)
131 #define _ASSERT_STRING_INNER(x) #x
132 #define assert(x) if (!(x)) { \
133    printf("Shader assertion fail at " __FILE__ ":" \
134           _ASSERT_STRING(__LINE__) "\nExpected " #x "\n\n"); \
135    nir_printf_abort(); \
136 }
137 #else
138 #define assert(x)
139 #endif
140 
141 /* Core OpenCL C like likely/unlikely. We might be able to map to a clang built
142  * in though...
143  */
144 #define likely(x) (x)
145 #define unlikely(x) (x)
146 
147 /* These duplicate the C standard library and are required for the
148  * u_intN_min/max implementations.
149  */
150 #define UINT64_MAX 18446744073709551615ul
151 #define INT64_MAX 9223372036854775807l
152 
153 /* These duplicate util/macros.h. This could maybe be cleaned up */
154 #define BITFIELD_BIT(b)  (1u << b)
155 #define BITFIELD_MASK(m) (((m) == 32) ? 0xffffffff : ((1u << (m)) - 1))
156 #define ASSERTED
157 #define ALWAYS_INLINE
158 #define UNUSED
159 
160 static inline int64_t
u_intN_max(unsigned bit_size)161 u_intN_max(unsigned bit_size)
162 {
163    assert(bit_size <= 64 && bit_size > 0);
164    return INT64_MAX >> (64 - bit_size);
165 }
166 
167 static inline int64_t
u_intN_min(unsigned bit_size)168 u_intN_min(unsigned bit_size)
169 {
170    return (-u_intN_max(bit_size)) - 1;
171 }
172 
173 static inline uint64_t
u_uintN_max(unsigned bit_size)174 u_uintN_max(unsigned bit_size)
175 {
176    assert(bit_size <= 64 && bit_size > 0);
177    return UINT64_MAX >> (64 - bit_size);
178 }
179 
180 static inline uint
align(uint x,uint y)181 align(uint x, uint y)
182 {
183    return (x + y - 1) & ~(y - 1);
184 }
185 
186 static inline uint32_t
util_logbase2(uint32_t n)187 util_logbase2(uint32_t n)
188 {
189    return (31 - clz(n | 1));
190 }
191 
192 static inline uint32_t
util_logbase2_ceil(uint32_t n)193 util_logbase2_ceil(uint32_t n)
194 {
195    return (n <= 1) ? 0 : 32 - clz(n - 1);
196 }
197 
198 #define BITFIELD64_MASK(x) ((x == 64) ? ~0ul : ((1ul << x) - 1))
199 #define IS_POT(v)          (((v) & ((v) - 1)) == 0)
200 #define IS_POT_NONZERO(v)  ((v) != 0 && IS_POT(v))
201 #define DIV_ROUND_UP(A, B)      (((A) + (B) - 1) / (B))
202 #define CLAMP(X, MIN, MAX)      ((X) > (MIN) ? ((X) > (MAX) ? (MAX) : (X)) : (MIN))
203 #define ALIGN_POT(x, pot_align) (((x) + (pot_align) - 1) & ~((pot_align) - 1))
204 #define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
205 #define MIN2( A, B )   ( (A)<(B) ? (A) : (B) )
206 
207 static inline uint32_t
fui(float f)208 fui(float f)
209 {
210    return as_uint(f);
211 }
212 
213 static inline float
uif(uint32_t ui)214 uif(uint32_t ui)
215 {
216    return as_float(ui);
217 }
218 
219 #define CL_FLT_EPSILON 1.1920928955078125e-7f
220 
221 /* OpenCL C lacks roundf and llroundf, we can emulate it */
roundf(float x)222 static inline float roundf(float x)
223 {
224    return trunc(x + copysign(0.5f - 0.25f * CL_FLT_EPSILON, x));
225 }
226 
llroundf(float x)227 static inline long long llroundf(float x)
228 {
229    return roundf(x);
230 }
231 
232 static inline uint16_t
_mesa_float_to_half(float f)233 _mesa_float_to_half(float f)
234 {
235    return as_ushort(convert_half(f));
236 }
237 
238 static inline float
_mesa_half_to_float(uint16_t w)239 _mesa_half_to_float(uint16_t w)
240 {
241    return convert_float(as_half(w));
242 }
243 
244 /* Duplicates u_math.h. We should make that header CL safe at some point...
245  */
246 static inline int64_t
util_sign_extend(uint64_t val,unsigned width)247 util_sign_extend(uint64_t val, unsigned width)
248 {
249    unsigned shift = 64 - width;
250    return (int64_t)(val << shift) >> shift;
251 }
252 
253 #endif
254