1#version 450 core 2 3#extension GL_AMD_gpu_shader_half_float: enable 4#extension GL_ARB_gpu_shader_int64: enable 5 6void main() 7{ 8} 9 10// Half float literals 11void literal() 12{ 13 const float16_t f16c = 0.000001hf; 14 const f16vec2 f16cv = f16vec2(-0.25HF, 0.03HF); 15 16 f16vec2 f16v; 17 f16v.x = f16c; 18 f16v += f16cv; 19} 20 21// Block memory layout 22struct S 23{ 24 float16_t x; // rule 1: align = 2, takes offsets 0-1 25 f16vec2 y; // rule 2: align = 4, takes offsets 4-7 26 f16vec3 z; // rule 3: align = 8, takes offsets 8-13 27}; 28 29layout(column_major, std140) uniform B1 30{ 31 float16_t a; // rule 1: align = 2, takes offsets 0-1 32 f16vec2 b; // rule 2: align = 4, takes offsets 4-7 33 f16vec3 c; // rule 3: align = 8, takes offsets 8-15 34 float16_t d[2]; // rule 4: align = 16, array stride = 16, 35 // takes offsets 16-47 36 f16mat2x3 e; // rule 5: align = 16, matrix stride = 16, 37 // takes offsets 48-79 38 f16mat2x3 f[2]; // rule 6: align = 16, matrix stride = 16, 39 // array stride = 32, f[0] takes 40 // offsets 80-111, f[1] takes offsets 41 // 112-143 42 S g; // rule 9: align = 16, g.x takes offsets 43 // 144-145, g.y takes offsets 148-151, 44 // g.z takes offsets 152-159 45 S h[2]; // rule 10: align = 16, array stride = 16, h[0] 46 // takes offsets 160-175, h[1] takes 47 // offsets 176-191 48}; 49 50layout(row_major, std430) buffer B2 51{ 52 float16_t o; // rule 1: align = 2, takes offsets 0-1 53 f16vec2 p; // rule 2: align = 4, takes offsets 4-7 54 f16vec3 q; // rule 3: align = 8, takes offsets 8-13 55 float16_t r[2]; // rule 4: align = 2, array stride = 2, takes 56 // offsets 14-17 57 f16mat2x3 s; // rule 7: align = 4, matrix stride = 4, takes 58 // offsets 20-31 59 f16mat2x3 t[2]; // rule 8: align = 4, matrix stride = 4, array 60 // stride = 12, t[0] takes offsets 61 // 32-43, t[1] takes offsets 44-55 62 S u; // rule 9: align = 8, u.x takes offsets 63 // 56-57, u.y takes offsets 60-63, u.z 64 // takes offsets 64-69 65 S v[2]; // rule 10: align = 8, array stride = 16, v[0] 66 // takes offsets 72-87, v[1] takes 67 // offsets 88-103 68}; 69 70// Specialization constant 71layout(constant_id = 100) const float16_t sf16 = 0.125hf; 72layout(constant_id = 101) const float sf = 0.25; 73layout(constant_id = 102) const double sd = 0.5lf; 74 75const float f16_to_f = float(sf16); 76const double f16_to_d = float(sf16); 77 78const float16_t f_to_f16 = float16_t(sf); 79const float16_t d_to_f16 = float16_t(sd); 80 81void operators() 82{ 83 float16_t f16; 84 f16vec2 f16v; 85 f16mat2x2 f16m; 86 bool b; 87 88 // Arithmetic 89 f16v += f16v; 90 f16v -= f16v; 91 f16v *= f16v; 92 f16v /= f16v; 93 f16v++; 94 f16v--; 95 ++f16m; 96 --f16m; 97 f16v = -f16v; 98 f16m = -f16m; 99 100 f16 = f16v.x + f16v.y; 101 f16 = f16v.x - f16v.y; 102 f16 = f16v.x * f16v.y; 103 f16 = f16v.x / f16v.y; 104 105 // Relational 106 b = (f16v.x != f16); 107 b = (f16v.y == f16); 108 b = (f16v.x > f16); 109 b = (f16v.y < f16); 110 b = (f16v.x >= f16); 111 b = (f16v.y <= f16); 112 113 // Vector/matrix operations 114 f16v = f16v * f16; 115 f16m = f16m * f16; 116 f16v = f16m * f16v; 117 f16v = f16v * f16m; 118 f16m = f16m * f16m; 119} 120 121void typeCast() 122{ 123 bvec3 bv; 124 vec3 fv; 125 dvec3 dv; 126 ivec3 iv; 127 uvec3 uv; 128 i64vec3 i64v; 129 u64vec3 u64v; 130 131 f16vec3 f16v; 132 133 f16v = f16vec3(bv); // bool -> float16 134 bv = bvec3(f16v); // float16 -> bool 135 136 f16v = f16vec3(fv); // float -> float16 137 fv = vec3(f16v); // float16 -> float 138 139 f16v = f16vec3(dv); // double -> float16 140 dv = dvec3(dv); // float16 -> double 141 142 f16v = f16vec3(iv); // int -> float16 143 iv = ivec3(f16v); // float16 -> int 144 145 f16v = f16vec3(uv); // uint -> float16 146 uv = uvec3(f16v); // float16 -> uint 147 148 f16v = f16vec3(i64v); // int64 -> float16 149 i64v = i64vec3(f16v); // float16 -> int64 150 151 f16v = f16vec3(u64v); // uint64 -> float16 152 u64v = u64vec3(f16v); // float16 -> uint64 153} 154 155void builtinAngleTrigFuncs() 156{ 157 f16vec4 f16v1, f16v2; 158 159 f16v2 = radians(f16v1); 160 f16v2 = degrees(f16v1); 161 f16v2 = sin(f16v1); 162 f16v2 = cos(f16v1); 163 f16v2 = tan(f16v1); 164 f16v2 = asin(f16v1); 165 f16v2 = acos(f16v1); 166 f16v2 = atan(f16v1, f16v2); 167 f16v2 = atan(f16v1); 168 f16v2 = sinh(f16v1); 169 f16v2 = cosh(f16v1); 170 f16v2 = tanh(f16v1); 171 f16v2 = asinh(f16v1); 172 f16v2 = acosh(f16v1); 173 f16v2 = atanh(f16v1); 174} 175 176void builtinExpFuncs() 177{ 178 f16vec2 f16v1, f16v2; 179 180 f16v2 = pow(f16v1, f16v2); 181 f16v2 = exp(f16v1); 182 f16v2 = log(f16v1); 183 f16v2 = exp2(f16v1); 184 f16v2 = log2(f16v1); 185 f16v2 = sqrt(f16v1); 186 f16v2 = inversesqrt(f16v1); 187} 188 189void builtinCommonFuncs() 190{ 191 f16vec3 f16v1, f16v2, f16v3; 192 float16_t f16; 193 bool b; 194 bvec3 bv; 195 ivec3 iv; 196 197 f16v2 = abs(f16v1); 198 f16v2 = sign(f16v1); 199 f16v2 = floor(f16v1); 200 f16v2 = trunc(f16v1); 201 f16v2 = round(f16v1); 202 f16v2 = roundEven(f16v1); 203 f16v2 = ceil(f16v1); 204 f16v2 = fract(f16v1); 205 f16v2 = mod(f16v1, f16v2); 206 f16v2 = mod(f16v1, f16); 207 f16v3 = modf(f16v1, f16v2); 208 f16v3 = min(f16v1, f16v2); 209 f16v3 = min(f16v1, f16); 210 f16v3 = max(f16v1, f16v2); 211 f16v3 = max(f16v1, f16); 212 f16v3 = clamp(f16v1, f16, f16v2.x); 213 f16v3 = clamp(f16v1, f16v2, f16vec3(f16)); 214 f16v3 = mix(f16v1, f16v2, f16); 215 f16v3 = mix(f16v1, f16v2, f16v3); 216 f16v3 = mix(f16v1, f16v2, bv); 217 f16v3 = step(f16v1, f16v2); 218 f16v3 = step(f16, f16v3); 219 f16v3 = smoothstep(f16v1, f16v2, f16v3); 220 f16v3 = smoothstep(f16, f16v1.x, f16v2); 221 b = isnan(f16); 222 bv = isinf(f16v1); 223 f16v3 = fma(f16v1, f16v2, f16v3); 224 f16v2 = frexp(f16v1, iv); 225 f16v2 = ldexp(f16v1, iv); 226} 227 228void builtinPackUnpackFuncs() 229{ 230 uint u; 231 f16vec2 f16v; 232 233 u = packFloat2x16(f16v); 234 f16v = unpackFloat2x16(u); 235} 236 237void builtinGeometryFuncs() 238{ 239 float16_t f16; 240 f16vec3 f16v1, f16v2, f16v3; 241 242 f16 = length(f16v1); 243 f16 = distance(f16v1, f16v2); 244 f16 = dot(f16v1, f16v2); 245 f16v3 = cross(f16v1, f16v2); 246 f16v2 = normalize(f16v1); 247 f16v3 = faceforward(f16v1, f16v2, f16v3); 248 f16v3 = reflect(f16v1, f16v2); 249 f16v3 = refract(f16v1, f16v2, f16); 250} 251 252void builtinMatrixFuncs() 253{ 254 f16mat2x3 f16m1, f16m2, f16m3; 255 f16mat3x2 f16m4; 256 f16mat3 f16m5; 257 f16mat4 f16m6, f16m7; 258 259 f16vec3 f16v1; 260 f16vec2 f16v2; 261 262 float16_t f16; 263 264 f16m3 = matrixCompMult(f16m1, f16m2); 265 f16m1 = outerProduct(f16v1, f16v2); 266 f16m4 = transpose(f16m1); 267 f16 = determinant(f16m5); 268 f16m6 = inverse(f16m7); 269} 270 271void builtinVecRelFuncs() 272{ 273 f16vec3 f16v1, f16v2; 274 bvec3 bv; 275 276 bv = lessThan(f16v1, f16v2); 277 bv = lessThanEqual(f16v1, f16v2); 278 bv = greaterThan(f16v1, f16v2); 279 bv = greaterThanEqual(f16v1, f16v2); 280 bv = equal(f16v1, f16v2); 281 bv = notEqual(f16v1, f16v2); 282} 283 284in f16vec3 if16v; 285 286void builtinFragProcFuncs() 287{ 288 f16vec3 f16v; 289 290 // Derivative 291 f16v.x = dFdx(if16v.x); 292 f16v.y = dFdy(if16v.y); 293 f16v.xy = dFdxFine(if16v.xy); 294 f16v.xy = dFdyFine(if16v.xy); 295 f16v = dFdxCoarse(if16v); 296 f16v = dFdxCoarse(if16v); 297 298 f16v.x = fwidth(if16v.x); 299 f16v.xy = fwidthFine(if16v.xy); 300 f16v = fwidthCoarse(if16v); 301 302 // Interpolation 303 f16v.x = interpolateAtCentroid(if16v.x); 304 f16v.xy = interpolateAtSample(if16v.xy, 1); 305 f16v = interpolateAtOffset(if16v, f16vec2(0.5hf)); 306} 307