1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "SamplerCore.hpp"
16
17 #include "Constants.hpp"
18 #include "PixelRoutine.hpp"
19 #include "System/Debug.hpp"
20 #include "Vulkan/VkSampler.hpp"
21
22 namespace sw {
23
SamplerCore(Pointer<Byte> & constants,const Sampler & state)24 SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler &state)
25 : constants(constants)
26 , state(state)
27 {
28 }
sampleTexture(Pointer<Byte> & texture,Float4 uvwa[4],Float4 & dRef,Float && lodOrBias,Float4 & dsx,Float4 & dsy,Vector4i & offset,Int4 & sample,SamplerFunction function)29 Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 uvwa[4], Float4 &dRef, Float &&lodOrBias, Float4 &dsx, Float4 &dsy, Vector4i &offset, Int4 &sample, SamplerFunction function)
30 {
31 Vector4f c;
32
33 Float4 u = uvwa[0];
34 Float4 v = uvwa[1];
35 Float4 w = uvwa[2];
36 Float4 a; // Array layer coordinate
37 switch(state.textureType)
38 {
39 case VK_IMAGE_VIEW_TYPE_1D_ARRAY: a = uvwa[1]; break;
40 case VK_IMAGE_VIEW_TYPE_2D_ARRAY: a = uvwa[2]; break;
41 case VK_IMAGE_VIEW_TYPE_CUBE_ARRAY: a = uvwa[3]; break;
42 default: break;
43 }
44
45 Float lod;
46 Float anisotropy;
47 Float4 uDelta;
48 Float4 vDelta;
49 Float4 M; // Major axis
50
51 if(state.isCube())
52 {
53 Int4 face = cubeFace(u, v, uvwa[0], uvwa[1], uvwa[2], M);
54 w = As<Float4>(face);
55 }
56
57 // Determine if we can skip the LOD computation. This is the case when the mipmap has only one level, except for LOD query,
58 // where we have to return the computed value. Anisotropic filtering requires computing the anisotropy factor even for a single mipmap level.
59 bool singleMipLevel = (state.minLod == state.maxLod);
60 bool requiresLodComputation = (function == Query) || (state.textureFilter == FILTER_ANISOTROPIC);
61 bool skipLodComputation = singleMipLevel && !requiresLodComputation;
62
63 if(skipLodComputation)
64 {
65 lod = state.minLod;
66 }
67 else if(function == Implicit || function == Bias || function == Grad || function == Query)
68 {
69 if(state.is1D())
70 {
71 computeLod1D(texture, lod, u, dsx, dsy, function);
72 }
73 else if(state.is2D())
74 {
75 computeLod2D(texture, lod, anisotropy, uDelta, vDelta, u, v, dsx, dsy, function);
76 }
77 else if(state.isCube())
78 {
79 computeLodCube(texture, lod, uvwa[0], uvwa[1], uvwa[2], dsx, dsy, M, function);
80 }
81 else
82 {
83 computeLod3D(texture, lod, u, v, w, dsx, dsy, function);
84 }
85
86 Float bias = state.mipLodBias;
87
88 if(function == Bias)
89 {
90 // Add SPIR-V Bias operand to the sampler provided bias and clamp to maxSamplerLodBias limit.
91 bias = Min(Max(bias + lodOrBias, -vk::MAX_SAMPLER_LOD_BIAS), vk::MAX_SAMPLER_LOD_BIAS);
92 }
93
94 lod += bias;
95 }
96 else if(function == Lod)
97 {
98 // Vulkan 1.1: "The absolute value of mipLodBias must be less than or equal to VkPhysicalDeviceLimits::maxSamplerLodBias"
99 // Hence no explicit clamping to maxSamplerLodBias is required in this case.
100 lod = lodOrBias + state.mipLodBias;
101 }
102 else if(function == Fetch)
103 {
104 // TODO: Eliminate int-float-int conversion.
105 lod = Float(As<Int>(lodOrBias));
106 }
107 else if(function == Base || function == Gather)
108 {
109 lod = Float(0);
110 }
111 else
112 UNREACHABLE("Sampler function %d", int(function));
113
114 if(function != Base && function != Fetch && function != Gather)
115 {
116 if(function == Query)
117 {
118 c.y = Float4(lod); // Unclamped LOD.
119 }
120
121 if(!skipLodComputation)
122 {
123 lod = Max(lod, state.minLod);
124 lod = Min(lod, state.maxLod);
125 }
126
127 if(function == Query)
128 {
129 if(state.mipmapFilter == MIPMAP_POINT)
130 {
131 lod = Round(lod); // TODO: Preferred formula is ceil(lod + 0.5) - 1
132 }
133
134 c.x = lod;
135 // c.y contains unclamped LOD.
136
137 return c;
138 }
139 }
140
141 bool force32BitFiltering = state.highPrecisionFiltering && !isYcbcrFormat() && (state.textureFilter != FILTER_POINT);
142 bool use32BitFiltering = hasFloatTexture() || hasUnnormalizedIntegerTexture() || force32BitFiltering ||
143 state.isCube() || state.unnormalizedCoordinates || state.compareEnable ||
144 borderModeActive() || (function == Gather) || (function == Fetch);
145 const sw::float4 compScale = getComponentScale();
146 int gatherComponent = (function == Gather) ? getGatherComponent() : 0;
147 int numComponents = (function == Gather) ? 4 : textureComponentCount();
148
149 if(use32BitFiltering)
150 {
151 c = sampleFloatFilter(texture, u, v, w, a, dRef, offset, sample, lod, anisotropy, uDelta, vDelta, function);
152
153 if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable)
154 {
155 for(int component = 0; component < numComponents; component++)
156 {
157 c[component] *= Float4(1.0f / compScale[(function == Gather) ? gatherComponent : component]);
158 }
159 }
160 }
161 else // 16-bit filtering.
162 {
163 Vector4s cs = sampleFilter(texture, u, v, w, a, offset, sample, lod, anisotropy, uDelta, vDelta, function);
164
165 for(int component = 0; component < numComponents; component++)
166 {
167 if(hasUnsignedTextureComponent(component))
168 {
169 c[component] = Float4(As<UShort4>(cs[component]));
170 }
171 else
172 {
173 c[component] = Float4(cs[component]);
174 }
175
176 c[component] *= Float4(1.0f / compScale[(function == Gather) ? gatherComponent : component]);
177 }
178 }
179
180 if(state.textureFormat.isSignedNormalized())
181 {
182 for(int component = 0; component < numComponents; component++)
183 {
184 c[component] = Max(c[component], Float4(-1.0f));
185 }
186 }
187
188 if(state.textureFilter != FILTER_GATHER)
189 {
190 if((state.swizzle.r != VK_COMPONENT_SWIZZLE_R) ||
191 (state.swizzle.g != VK_COMPONENT_SWIZZLE_G) ||
192 (state.swizzle.b != VK_COMPONENT_SWIZZLE_B) ||
193 (state.swizzle.a != VK_COMPONENT_SWIZZLE_A))
194 {
195 const Vector4f col = c;
196 bool integer = hasUnnormalizedIntegerTexture();
197 c.x = applySwizzle(col, state.swizzle.r, integer);
198 c.y = applySwizzle(col, state.swizzle.g, integer);
199 c.z = applySwizzle(col, state.swizzle.b, integer);
200 c.w = applySwizzle(col, state.swizzle.a, integer);
201 }
202 }
203 else // Gather
204 {
205 VkComponentSwizzle swizzle = gatherSwizzle();
206
207 // R/G/B/A swizzles affect the component collected from each texel earlier.
208 // Handle the ZERO and ONE cases here because we don't need to know the format.
209
210 if(swizzle == VK_COMPONENT_SWIZZLE_ZERO)
211 {
212 c.x = c.y = c.z = c.w = Float4(0);
213 }
214 else if(swizzle == VK_COMPONENT_SWIZZLE_ONE)
215 {
216 bool integer = hasUnnormalizedIntegerTexture();
217 c.x = c.y = c.z = c.w = integer ? As<Float4>(Int4(1)) : RValue<Float4>(Float4(1.0f));
218 }
219 }
220
221 return c;
222 }
223
applySwizzle(const Vector4f & c,VkComponentSwizzle swizzle,bool integer)224 Float4 SamplerCore::applySwizzle(const Vector4f &c, VkComponentSwizzle swizzle, bool integer)
225 {
226 switch(swizzle)
227 {
228 default: UNSUPPORTED("VkComponentSwizzle %d", (int)swizzle);
229 case VK_COMPONENT_SWIZZLE_R: return c.x;
230 case VK_COMPONENT_SWIZZLE_G: return c.y;
231 case VK_COMPONENT_SWIZZLE_B: return c.z;
232 case VK_COMPONENT_SWIZZLE_A: return c.w;
233 case VK_COMPONENT_SWIZZLE_ZERO: return Float4(0.0f, 0.0f, 0.0f, 0.0f);
234 case VK_COMPONENT_SWIZZLE_ONE:
235 if(integer)
236 {
237 return Float4(As<Float4>(sw::Int4(1, 1, 1, 1)));
238 }
239 else
240 {
241 return Float4(1.0f, 1.0f, 1.0f, 1.0f);
242 }
243 break;
244 }
245 };
246
offsetSample(Short4 & uvw,Pointer<Byte> & mipmap,int halfOffset,bool wrap,int count,Float & lod)247 Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod)
248 {
249 Short4 offset = *Pointer<Short4>(mipmap + halfOffset);
250
251 if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
252 {
253 offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f)));
254 }
255 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
256 {
257 offset &= Short4(CmpLE(Float4(lod), Float4(0.0f)));
258 }
259
260 if(wrap)
261 {
262 switch(count)
263 {
264 case -1: return uvw - offset;
265 case 0: return uvw;
266 case +1: return uvw + offset;
267 case 2: return uvw + offset + offset;
268 }
269 }
270 else // Clamp or mirror
271 {
272 switch(count)
273 {
274 case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
275 case 0: return uvw;
276 case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
277 case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
278 }
279 }
280
281 return uvw;
282 }
283
sampleFilter(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,const Float4 & a,Vector4i & offset,const Int4 & sample,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,SamplerFunction function)284 Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Vector4i &offset, const Int4 &sample, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
285 {
286 Vector4s c = sampleAniso(texture, u, v, w, a, offset, sample, lod, anisotropy, uDelta, vDelta, false, function);
287
288 if(function == Fetch)
289 {
290 return c;
291 }
292
293 if(state.mipmapFilter == MIPMAP_LINEAR)
294 {
295 Vector4s cc = sampleAniso(texture, u, v, w, a, offset, sample, lod, anisotropy, uDelta, vDelta, true, function);
296
297 lod *= Float(1 << 16);
298
299 UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize
300 Short4 stri = utri >> 1; // FIXME: Optimize
301
302 if(hasUnsignedTextureComponent(0))
303 cc.x = MulHigh(As<UShort4>(cc.x), utri);
304 else
305 cc.x = MulHigh(cc.x, stri);
306 if(hasUnsignedTextureComponent(1))
307 cc.y = MulHigh(As<UShort4>(cc.y), utri);
308 else
309 cc.y = MulHigh(cc.y, stri);
310 if(hasUnsignedTextureComponent(2))
311 cc.z = MulHigh(As<UShort4>(cc.z), utri);
312 else
313 cc.z = MulHigh(cc.z, stri);
314 if(hasUnsignedTextureComponent(3))
315 cc.w = MulHigh(As<UShort4>(cc.w), utri);
316 else
317 cc.w = MulHigh(cc.w, stri);
318
319 utri = ~utri;
320 stri = Short4(0x7FFF) - stri;
321
322 if(hasUnsignedTextureComponent(0))
323 c.x = MulHigh(As<UShort4>(c.x), utri);
324 else
325 c.x = MulHigh(c.x, stri);
326 if(hasUnsignedTextureComponent(1))
327 c.y = MulHigh(As<UShort4>(c.y), utri);
328 else
329 c.y = MulHigh(c.y, stri);
330 if(hasUnsignedTextureComponent(2))
331 c.z = MulHigh(As<UShort4>(c.z), utri);
332 else
333 c.z = MulHigh(c.z, stri);
334 if(hasUnsignedTextureComponent(3))
335 c.w = MulHigh(As<UShort4>(c.w), utri);
336 else
337 c.w = MulHigh(c.w, stri);
338
339 c.x += cc.x;
340 c.y += cc.y;
341 c.z += cc.z;
342 c.w += cc.w;
343
344 if(!hasUnsignedTextureComponent(0)) c.x += c.x;
345 if(!hasUnsignedTextureComponent(1)) c.y += c.y;
346 if(!hasUnsignedTextureComponent(2)) c.z += c.z;
347 if(!hasUnsignedTextureComponent(3)) c.w += c.w;
348 }
349
350 return c;
351 }
352
sampleAniso(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,const Float4 & a,Vector4i & offset,const Int4 & sample,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,bool secondLOD,SamplerFunction function)353 Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Vector4i &offset, const Int4 &sample, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
354 {
355 Vector4s c;
356
357 if(state.textureFilter != FILTER_ANISOTROPIC)
358 {
359 c = sampleQuad(texture, u, v, w, a, offset, sample, lod, secondLOD, function);
360 }
361 else
362 {
363 Int N = RoundInt(anisotropy);
364
365 Vector4s cSum;
366
367 cSum.x = Short4(0);
368 cSum.y = Short4(0);
369 cSum.z = Short4(0);
370 cSum.w = Short4(0);
371
372 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants, uvWeight) + 16 * N);
373 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants, uvStart) + 16 * N);
374 UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants, cWeight) + 8 * N);
375 Short4 sw = Short4(cw >> 1);
376
377 Float4 du = uDelta;
378 Float4 dv = vDelta;
379
380 Float4 u0 = u + B * du;
381 Float4 v0 = v + B * dv;
382
383 du *= A;
384 dv *= A;
385
386 Int i = 0;
387
388 Do
389 {
390 c = sampleQuad(texture, u0, v0, w, a, offset, sample, lod, secondLOD, function);
391
392 u0 += du;
393 v0 += dv;
394
395 if(hasUnsignedTextureComponent(0))
396 cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw));
397 else
398 cSum.x += MulHigh(c.x, sw);
399 if(hasUnsignedTextureComponent(1))
400 cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw));
401 else
402 cSum.y += MulHigh(c.y, sw);
403 if(hasUnsignedTextureComponent(2))
404 cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw));
405 else
406 cSum.z += MulHigh(c.z, sw);
407 if(hasUnsignedTextureComponent(3))
408 cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw));
409 else
410 cSum.w += MulHigh(c.w, sw);
411
412 i++;
413 }
414 Until(i >= N);
415
416 if(hasUnsignedTextureComponent(0))
417 c.x = cSum.x;
418 else
419 c.x = AddSat(cSum.x, cSum.x);
420 if(hasUnsignedTextureComponent(1))
421 c.y = cSum.y;
422 else
423 c.y = AddSat(cSum.y, cSum.y);
424 if(hasUnsignedTextureComponent(2))
425 c.z = cSum.z;
426 else
427 c.z = AddSat(cSum.z, cSum.z);
428 if(hasUnsignedTextureComponent(3))
429 c.w = cSum.w;
430 else
431 c.w = AddSat(cSum.w, cSum.w);
432 }
433
434 return c;
435 }
436
sampleQuad(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,const Float4 & a,Vector4i & offset,const Int4 & sample,Float & lod,bool secondLOD,SamplerFunction function)437 Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Vector4i &offset, const Int4 &sample, Float &lod, bool secondLOD, SamplerFunction function)
438 {
439 if(state.textureType != VK_IMAGE_VIEW_TYPE_3D)
440 {
441 return sampleQuad2D(texture, u, v, w, a, offset, sample, lod, secondLOD, function);
442 }
443 else
444 {
445 return sample3D(texture, u, v, w, offset, sample, lod, secondLOD, function);
446 }
447 }
448
sampleQuad2D(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,const Float4 & a,Vector4i & offset,const Int4 & sample,Float & lod,bool secondLOD,SamplerFunction function)449 Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Vector4i &offset, const Int4 &sample, Float &lod, bool secondLOD, SamplerFunction function)
450 {
451 Vector4s c;
452
453 int componentCount = textureComponentCount();
454 bool gather = (state.textureFilter == FILTER_GATHER);
455
456 Pointer<Byte> mipmap;
457 Pointer<Byte> buffer;
458 selectMipmap(texture, mipmap, buffer, lod, secondLOD);
459
460 Short4 uuuu = address(u, state.addressingModeU, mipmap);
461 Short4 vvvv = address(v, state.addressingModeV, mipmap);
462 Short4 wwww = address(w, state.addressingModeW, mipmap);
463 Short4 layerIndex = computeLayerIndex(a, mipmap);
464
465 if(state.textureFilter == FILTER_POINT)
466 {
467 c = sampleTexel(uuuu, vvvv, wwww, layerIndex, offset, sample, mipmap, buffer, function);
468 }
469 else
470 {
471 Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
472 Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
473 Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
474 Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
475
476 Vector4s c00 = sampleTexel(uuuu0, vvvv0, wwww, layerIndex, offset, sample, mipmap, buffer, function);
477 Vector4s c10 = sampleTexel(uuuu1, vvvv0, wwww, layerIndex, offset, sample, mipmap, buffer, function);
478 Vector4s c01 = sampleTexel(uuuu0, vvvv1, wwww, layerIndex, offset, sample, mipmap, buffer, function);
479 Vector4s c11 = sampleTexel(uuuu1, vvvv1, wwww, layerIndex, offset, sample, mipmap, buffer, function);
480
481 if(!gather) // Blend
482 {
483 // Fractions
484 UShort4 f0u = As<UShort4>(uuuu0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, width)));
485 UShort4 f0v = As<UShort4>(vvvv0) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, height)));
486
487 UShort4 f1u = ~f0u;
488 UShort4 f1v = ~f0v;
489
490 UShort4 f0u0v = MulHigh(f0u, f0v);
491 UShort4 f1u0v = MulHigh(f1u, f0v);
492 UShort4 f0u1v = MulHigh(f0u, f1v);
493 UShort4 f1u1v = MulHigh(f1u, f1v);
494
495 // Signed fractions
496 Short4 f1u1vs;
497 Short4 f0u1vs;
498 Short4 f1u0vs;
499 Short4 f0u0vs;
500
501 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
502 {
503 f1u1vs = f1u1v >> 1;
504 f0u1vs = f0u1v >> 1;
505 f1u0vs = f1u0v >> 1;
506 f0u0vs = f0u0v >> 1;
507 }
508
509 // Bilinear interpolation
510 if(componentCount >= 1)
511 {
512 if(has16bitTextureComponents() && hasUnsignedTextureComponent(0))
513 {
514 c00.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0u) + MulHigh(As<UShort4>(c10.x), f0u);
515 c01.x = As<UShort4>(c01.x) - MulHigh(As<UShort4>(c01.x), f0u) + MulHigh(As<UShort4>(c11.x), f0u);
516 c.x = As<UShort4>(c00.x) - MulHigh(As<UShort4>(c00.x), f0v) + MulHigh(As<UShort4>(c01.x), f0v);
517 }
518 else
519 {
520 if(hasUnsignedTextureComponent(0))
521 {
522 c00.x = MulHigh(As<UShort4>(c00.x), f1u1v);
523 c10.x = MulHigh(As<UShort4>(c10.x), f0u1v);
524 c01.x = MulHigh(As<UShort4>(c01.x), f1u0v);
525 c11.x = MulHigh(As<UShort4>(c11.x), f0u0v);
526 }
527 else
528 {
529 c00.x = MulHigh(c00.x, f1u1vs);
530 c10.x = MulHigh(c10.x, f0u1vs);
531 c01.x = MulHigh(c01.x, f1u0vs);
532 c11.x = MulHigh(c11.x, f0u0vs);
533 }
534
535 c.x = (c00.x + c10.x) + (c01.x + c11.x);
536 if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions
537 }
538 }
539
540 if(componentCount >= 2)
541 {
542 if(has16bitTextureComponents() && hasUnsignedTextureComponent(1))
543 {
544 c00.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0u) + MulHigh(As<UShort4>(c10.y), f0u);
545 c01.y = As<UShort4>(c01.y) - MulHigh(As<UShort4>(c01.y), f0u) + MulHigh(As<UShort4>(c11.y), f0u);
546 c.y = As<UShort4>(c00.y) - MulHigh(As<UShort4>(c00.y), f0v) + MulHigh(As<UShort4>(c01.y), f0v);
547 }
548 else
549 {
550 if(hasUnsignedTextureComponent(1))
551 {
552 c00.y = MulHigh(As<UShort4>(c00.y), f1u1v);
553 c10.y = MulHigh(As<UShort4>(c10.y), f0u1v);
554 c01.y = MulHigh(As<UShort4>(c01.y), f1u0v);
555 c11.y = MulHigh(As<UShort4>(c11.y), f0u0v);
556 }
557 else
558 {
559 c00.y = MulHigh(c00.y, f1u1vs);
560 c10.y = MulHigh(c10.y, f0u1vs);
561 c01.y = MulHigh(c01.y, f1u0vs);
562 c11.y = MulHigh(c11.y, f0u0vs);
563 }
564
565 c.y = (c00.y + c10.y) + (c01.y + c11.y);
566 if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions
567 }
568 }
569
570 if(componentCount >= 3)
571 {
572 if(has16bitTextureComponents() && hasUnsignedTextureComponent(2))
573 {
574 c00.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0u) + MulHigh(As<UShort4>(c10.z), f0u);
575 c01.z = As<UShort4>(c01.z) - MulHigh(As<UShort4>(c01.z), f0u) + MulHigh(As<UShort4>(c11.z), f0u);
576 c.z = As<UShort4>(c00.z) - MulHigh(As<UShort4>(c00.z), f0v) + MulHigh(As<UShort4>(c01.z), f0v);
577 }
578 else
579 {
580 if(hasUnsignedTextureComponent(2))
581 {
582 c00.z = MulHigh(As<UShort4>(c00.z), f1u1v);
583 c10.z = MulHigh(As<UShort4>(c10.z), f0u1v);
584 c01.z = MulHigh(As<UShort4>(c01.z), f1u0v);
585 c11.z = MulHigh(As<UShort4>(c11.z), f0u0v);
586 }
587 else
588 {
589 c00.z = MulHigh(c00.z, f1u1vs);
590 c10.z = MulHigh(c10.z, f0u1vs);
591 c01.z = MulHigh(c01.z, f1u0vs);
592 c11.z = MulHigh(c11.z, f0u0vs);
593 }
594
595 c.z = (c00.z + c10.z) + (c01.z + c11.z);
596 if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions
597 }
598 }
599
600 if(componentCount >= 4)
601 {
602 if(has16bitTextureComponents() && hasUnsignedTextureComponent(3))
603 {
604 c00.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0u) + MulHigh(As<UShort4>(c10.w), f0u);
605 c01.w = As<UShort4>(c01.w) - MulHigh(As<UShort4>(c01.w), f0u) + MulHigh(As<UShort4>(c11.w), f0u);
606 c.w = As<UShort4>(c00.w) - MulHigh(As<UShort4>(c00.w), f0v) + MulHigh(As<UShort4>(c01.w), f0v);
607 }
608 else
609 {
610 if(hasUnsignedTextureComponent(3))
611 {
612 c00.w = MulHigh(As<UShort4>(c00.w), f1u1v);
613 c10.w = MulHigh(As<UShort4>(c10.w), f0u1v);
614 c01.w = MulHigh(As<UShort4>(c01.w), f1u0v);
615 c11.w = MulHigh(As<UShort4>(c11.w), f0u0v);
616 }
617 else
618 {
619 c00.w = MulHigh(c00.w, f1u1vs);
620 c10.w = MulHigh(c10.w, f0u1vs);
621 c01.w = MulHigh(c01.w, f1u0vs);
622 c11.w = MulHigh(c11.w, f0u0vs);
623 }
624
625 c.w = (c00.w + c10.w) + (c01.w + c11.w);
626 if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions
627 }
628 }
629 }
630 else // Gather
631 {
632 VkComponentSwizzle swizzle = gatherSwizzle();
633 switch(swizzle)
634 {
635 case VK_COMPONENT_SWIZZLE_ZERO:
636 case VK_COMPONENT_SWIZZLE_ONE:
637 // Handled at the final component swizzle.
638 break;
639 default:
640 c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
641 c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
642 c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
643 c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
644 break;
645 }
646 }
647 }
648
649 return c;
650 }
651
sample3D(Pointer<Byte> & texture,Float4 & u_,Float4 & v_,Float4 & w_,Vector4i & offset,const Int4 & sample,Float & lod,bool secondLOD,SamplerFunction function)652 Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4i &offset, const Int4 &sample, Float &lod, bool secondLOD, SamplerFunction function)
653 {
654 Vector4s c_;
655
656 int componentCount = textureComponentCount();
657
658 Pointer<Byte> mipmap;
659 Pointer<Byte> buffer;
660 selectMipmap(texture, mipmap, buffer, lod, secondLOD);
661
662 Short4 uuuu = address(u_, state.addressingModeU, mipmap);
663 Short4 vvvv = address(v_, state.addressingModeV, mipmap);
664 Short4 wwww = address(w_, state.addressingModeW, mipmap);
665
666 if(state.textureFilter == FILTER_POINT)
667 {
668 c_ = sampleTexel(uuuu, vvvv, wwww, 0, offset, sample, mipmap, buffer, function);
669 }
670 else
671 {
672 Vector4s c[2][2][2];
673
674 Short4 u[2][2][2];
675 Short4 v[2][2][2];
676 Short4 s[2][2][2];
677
678 for(int i = 0; i < 2; i++)
679 {
680 for(int j = 0; j < 2; j++)
681 {
682 for(int k = 0; k < 2; k++)
683 {
684 u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap, uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
685 v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap, vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
686 s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap, wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
687 }
688 }
689 }
690
691 // Fractions
692 UShort4 f0u = As<UShort4>(u[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, width)));
693 UShort4 f0v = As<UShort4>(v[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, height)));
694 UShort4 f0s = As<UShort4>(s[0][0][0]) * UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, depth)));
695
696 UShort4 f1u = ~f0u;
697 UShort4 f1v = ~f0v;
698 UShort4 f1s = ~f0s;
699
700 UShort4 f[2][2][2];
701 Short4 fs[2][2][2];
702
703 f[1][1][1] = MulHigh(f1u, f1v);
704 f[0][1][1] = MulHigh(f0u, f1v);
705 f[1][0][1] = MulHigh(f1u, f0v);
706 f[0][0][1] = MulHigh(f0u, f0v);
707 f[1][1][0] = MulHigh(f1u, f1v);
708 f[0][1][0] = MulHigh(f0u, f1v);
709 f[1][0][0] = MulHigh(f1u, f0v);
710 f[0][0][0] = MulHigh(f0u, f0v);
711
712 f[1][1][1] = MulHigh(f[1][1][1], f1s);
713 f[0][1][1] = MulHigh(f[0][1][1], f1s);
714 f[1][0][1] = MulHigh(f[1][0][1], f1s);
715 f[0][0][1] = MulHigh(f[0][0][1], f1s);
716 f[1][1][0] = MulHigh(f[1][1][0], f0s);
717 f[0][1][0] = MulHigh(f[0][1][0], f0s);
718 f[1][0][0] = MulHigh(f[1][0][0], f0s);
719 f[0][0][0] = MulHigh(f[0][0][0], f0s);
720
721 // Signed fractions
722 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
723 {
724 fs[0][0][0] = f[0][0][0] >> 1;
725 fs[0][0][1] = f[0][0][1] >> 1;
726 fs[0][1][0] = f[0][1][0] >> 1;
727 fs[0][1][1] = f[0][1][1] >> 1;
728 fs[1][0][0] = f[1][0][0] >> 1;
729 fs[1][0][1] = f[1][0][1] >> 1;
730 fs[1][1][0] = f[1][1][0] >> 1;
731 fs[1][1][1] = f[1][1][1] >> 1;
732 }
733
734 for(int i = 0; i < 2; i++)
735 {
736 for(int j = 0; j < 2; j++)
737 {
738 for(int k = 0; k < 2; k++)
739 {
740 c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], 0, offset, sample, mipmap, buffer, function);
741
742 if(componentCount >= 1)
743 {
744 if(hasUnsignedTextureComponent(0))
745 c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]);
746 else
747 c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]);
748 }
749 if(componentCount >= 2)
750 {
751 if(hasUnsignedTextureComponent(1))
752 c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]);
753 else
754 c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]);
755 }
756 if(componentCount >= 3)
757 {
758 if(hasUnsignedTextureComponent(2))
759 c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]);
760 else
761 c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]);
762 }
763 if(componentCount >= 4)
764 {
765 if(hasUnsignedTextureComponent(3))
766 c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]);
767 else
768 c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]);
769 }
770
771 if(i != 0 || j != 0 || k != 0)
772 {
773 if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x;
774 if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y;
775 if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z;
776 if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w;
777 }
778 }
779 }
780 }
781
782 if(componentCount >= 1) c_.x = c[0][0][0].x;
783 if(componentCount >= 2) c_.y = c[0][0][0].y;
784 if(componentCount >= 3) c_.z = c[0][0][0].z;
785 if(componentCount >= 4) c_.w = c[0][0][0].w;
786
787 // Correct for signed fractions
788 if(componentCount >= 1)
789 if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
790 if(componentCount >= 2)
791 if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
792 if(componentCount >= 3)
793 if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
794 if(componentCount >= 4)
795 if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
796 }
797
798 return c_;
799 }
800
sampleFloatFilter(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,const Float4 & a,Float4 & dRef,Vector4i & offset,const Int4 & sample,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,SamplerFunction function)801 Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Float4 &dRef, Vector4i &offset, const Int4 &sample, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, SamplerFunction function)
802 {
803 Vector4f c = sampleFloatAniso(texture, u, v, w, a, dRef, offset, sample, lod, anisotropy, uDelta, vDelta, false, function);
804
805 if(function == Fetch)
806 {
807 return c;
808 }
809
810 if(state.mipmapFilter == MIPMAP_LINEAR)
811 {
812 Vector4f cc = sampleFloatAniso(texture, u, v, w, a, dRef, offset, sample, lod, anisotropy, uDelta, vDelta, true, function);
813
814 Float4 lod4 = Float4(Frac(lod));
815
816 c.x = (cc.x - c.x) * lod4 + c.x;
817 c.y = (cc.y - c.y) * lod4 + c.y;
818 c.z = (cc.z - c.z) * lod4 + c.z;
819 c.w = (cc.w - c.w) * lod4 + c.w;
820 }
821
822 return c;
823 }
824
sampleFloatAniso(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,const Float4 & a,Float4 & dRef,Vector4i & offset,const Int4 & sample,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,bool secondLOD,SamplerFunction function)825 Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Float4 &dRef, Vector4i &offset, const Int4 &sample, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, bool secondLOD, SamplerFunction function)
826 {
827 Vector4f c;
828
829 if(state.textureFilter != FILTER_ANISOTROPIC)
830 {
831 c = sampleFloat(texture, u, v, w, a, dRef, offset, sample, lod, secondLOD, function);
832 }
833 else
834 {
835 Int N = RoundInt(anisotropy);
836
837 Vector4f cSum;
838
839 cSum.x = Float4(0.0f);
840 cSum.y = Float4(0.0f);
841 cSum.z = Float4(0.0f);
842 cSum.w = Float4(0.0f);
843
844 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants, uvWeight) + 16 * N);
845 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants, uvStart) + 16 * N);
846
847 Float4 du = uDelta;
848 Float4 dv = vDelta;
849
850 Float4 u0 = u + B * du;
851 Float4 v0 = v + B * dv;
852
853 du *= A;
854 dv *= A;
855
856 Int i = 0;
857
858 Do
859 {
860 c = sampleFloat(texture, u0, v0, w, a, dRef, offset, sample, lod, secondLOD, function);
861
862 u0 += du;
863 v0 += dv;
864
865 cSum.x += c.x * A;
866 cSum.y += c.y * A;
867 cSum.z += c.z * A;
868 cSum.w += c.w * A;
869
870 i++;
871 }
872 Until(i >= N);
873
874 c.x = cSum.x;
875 c.y = cSum.y;
876 c.z = cSum.z;
877 c.w = cSum.w;
878 }
879
880 return c;
881 }
882
sampleFloat(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,const Float4 & a,Float4 & dRef,Vector4i & offset,const Int4 & sample,Float & lod,bool secondLOD,SamplerFunction function)883 Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Float4 &dRef, Vector4i &offset, const Int4 &sample, Float &lod, bool secondLOD, SamplerFunction function)
884 {
885 if(state.textureType != VK_IMAGE_VIEW_TYPE_3D)
886 {
887 return sampleFloat2D(texture, u, v, w, a, dRef, offset, sample, lod, secondLOD, function);
888 }
889 else
890 {
891 return sampleFloat3D(texture, u, v, w, dRef, offset, sample, lod, secondLOD, function);
892 }
893 }
894
sampleFloat2D(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,const Float4 & a,Float4 & dRef,Vector4i & offset,const Int4 & sample,Float & lod,bool secondLOD,SamplerFunction function)895 Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, const Float4 &a, Float4 &dRef, Vector4i &offset, const Int4 &sample, Float &lod, bool secondLOD, SamplerFunction function)
896 {
897 Vector4f c;
898
899 int componentCount = textureComponentCount();
900 bool gather = (state.textureFilter == FILTER_GATHER);
901
902 Pointer<Byte> mipmap;
903 Pointer<Byte> buffer;
904 selectMipmap(texture, mipmap, buffer, lod, secondLOD);
905
906 Int4 x0, x1, y0, y1;
907 Float4 fu, fv;
908 Int4 filter = computeFilterOffset(lod);
909 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
910 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
911
912 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
913 y0 *= pitchP;
914
915 Int4 z;
916 if(state.isCube() || state.isArrayed())
917 {
918 Int4 face = As<Int4>(w);
919 Int4 layerIndex = computeLayerIndex(a, mipmap, function);
920
921 // For cube maps, the layer argument is per cube, each of which has 6 layers
922 if(state.textureType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
923 {
924 layerIndex *= Int4(6);
925 }
926
927 z = state.isCube() ? face : layerIndex;
928
929 if(state.textureType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
930 {
931 z += layerIndex;
932 }
933
934 z *= *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
935 }
936
937 if(state.textureFilter == FILTER_POINT || (function == Fetch))
938 {
939 c = sampleTexel(x0, y0, z, dRef, sample, mipmap, buffer, function);
940 }
941 else
942 {
943 y1 *= pitchP;
944
945 Vector4f c00 = sampleTexel(x0, y0, z, dRef, sample, mipmap, buffer, function);
946 Vector4f c10 = sampleTexel(x1, y0, z, dRef, sample, mipmap, buffer, function);
947 Vector4f c01 = sampleTexel(x0, y1, z, dRef, sample, mipmap, buffer, function);
948 Vector4f c11 = sampleTexel(x1, y1, z, dRef, sample, mipmap, buffer, function);
949
950 if(!gather) // Blend
951 {
952 if(componentCount >= 1) c00.x = c00.x + fu * (c10.x - c00.x);
953 if(componentCount >= 2) c00.y = c00.y + fu * (c10.y - c00.y);
954 if(componentCount >= 3) c00.z = c00.z + fu * (c10.z - c00.z);
955 if(componentCount >= 4) c00.w = c00.w + fu * (c10.w - c00.w);
956
957 if(componentCount >= 1) c01.x = c01.x + fu * (c11.x - c01.x);
958 if(componentCount >= 2) c01.y = c01.y + fu * (c11.y - c01.y);
959 if(componentCount >= 3) c01.z = c01.z + fu * (c11.z - c01.z);
960 if(componentCount >= 4) c01.w = c01.w + fu * (c11.w - c01.w);
961
962 if(componentCount >= 1) c.x = c00.x + fv * (c01.x - c00.x);
963 if(componentCount >= 2) c.y = c00.y + fv * (c01.y - c00.y);
964 if(componentCount >= 3) c.z = c00.z + fv * (c01.z - c00.z);
965 if(componentCount >= 4) c.w = c00.w + fv * (c01.w - c00.w);
966 }
967 else // Gather
968 {
969 VkComponentSwizzle swizzle = gatherSwizzle();
970 switch(swizzle)
971 {
972 case VK_COMPONENT_SWIZZLE_ZERO:
973 case VK_COMPONENT_SWIZZLE_ONE:
974 // Handled at the final component swizzle.
975 break;
976 default:
977 c.x = c01[swizzle - VK_COMPONENT_SWIZZLE_R];
978 c.y = c11[swizzle - VK_COMPONENT_SWIZZLE_R];
979 c.z = c10[swizzle - VK_COMPONENT_SWIZZLE_R];
980 c.w = c00[swizzle - VK_COMPONENT_SWIZZLE_R];
981 break;
982 }
983 }
984 }
985
986 return c;
987 }
988
sampleFloat3D(Pointer<Byte> & texture,Float4 & u,Float4 & v,Float4 & w,Float4 & dRef,Vector4i & offset,const Int4 & sample,Float & lod,bool secondLOD,SamplerFunction function)989 Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &dRef, Vector4i &offset, const Int4 &sample, Float &lod, bool secondLOD, SamplerFunction function)
990 {
991 Vector4f c;
992
993 int componentCount = textureComponentCount();
994
995 Pointer<Byte> mipmap;
996 Pointer<Byte> buffer;
997 selectMipmap(texture, mipmap, buffer, lod, secondLOD);
998
999 Int4 x0, x1, y0, y1, z0, z1;
1000 Float4 fu, fv, fw;
1001 Int4 filter = computeFilterOffset(lod);
1002 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
1003 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
1004 address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
1005
1006 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
1007 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
1008 y0 *= pitchP;
1009 z0 *= sliceP;
1010
1011 if(state.textureFilter == FILTER_POINT || (function == Fetch))
1012 {
1013 c = sampleTexel(x0, y0, z0, dRef, sample, mipmap, buffer, function);
1014 }
1015 else
1016 {
1017 y1 *= pitchP;
1018 z1 *= sliceP;
1019
1020 Vector4f c000 = sampleTexel(x0, y0, z0, dRef, sample, mipmap, buffer, function);
1021 Vector4f c100 = sampleTexel(x1, y0, z0, dRef, sample, mipmap, buffer, function);
1022 Vector4f c010 = sampleTexel(x0, y1, z0, dRef, sample, mipmap, buffer, function);
1023 Vector4f c110 = sampleTexel(x1, y1, z0, dRef, sample, mipmap, buffer, function);
1024 Vector4f c001 = sampleTexel(x0, y0, z1, dRef, sample, mipmap, buffer, function);
1025 Vector4f c101 = sampleTexel(x1, y0, z1, dRef, sample, mipmap, buffer, function);
1026 Vector4f c011 = sampleTexel(x0, y1, z1, dRef, sample, mipmap, buffer, function);
1027 Vector4f c111 = sampleTexel(x1, y1, z1, dRef, sample, mipmap, buffer, function);
1028
1029 // Blend first slice
1030 if(componentCount >= 1) c000.x = c000.x + fu * (c100.x - c000.x);
1031 if(componentCount >= 2) c000.y = c000.y + fu * (c100.y - c000.y);
1032 if(componentCount >= 3) c000.z = c000.z + fu * (c100.z - c000.z);
1033 if(componentCount >= 4) c000.w = c000.w + fu * (c100.w - c000.w);
1034
1035 if(componentCount >= 1) c010.x = c010.x + fu * (c110.x - c010.x);
1036 if(componentCount >= 2) c010.y = c010.y + fu * (c110.y - c010.y);
1037 if(componentCount >= 3) c010.z = c010.z + fu * (c110.z - c010.z);
1038 if(componentCount >= 4) c010.w = c010.w + fu * (c110.w - c010.w);
1039
1040 if(componentCount >= 1) c000.x = c000.x + fv * (c010.x - c000.x);
1041 if(componentCount >= 2) c000.y = c000.y + fv * (c010.y - c000.y);
1042 if(componentCount >= 3) c000.z = c000.z + fv * (c010.z - c000.z);
1043 if(componentCount >= 4) c000.w = c000.w + fv * (c010.w - c000.w);
1044
1045 // Blend second slice
1046 if(componentCount >= 1) c001.x = c001.x + fu * (c101.x - c001.x);
1047 if(componentCount >= 2) c001.y = c001.y + fu * (c101.y - c001.y);
1048 if(componentCount >= 3) c001.z = c001.z + fu * (c101.z - c001.z);
1049 if(componentCount >= 4) c001.w = c001.w + fu * (c101.w - c001.w);
1050
1051 if(componentCount >= 1) c011.x = c011.x + fu * (c111.x - c011.x);
1052 if(componentCount >= 2) c011.y = c011.y + fu * (c111.y - c011.y);
1053 if(componentCount >= 3) c011.z = c011.z + fu * (c111.z - c011.z);
1054 if(componentCount >= 4) c011.w = c011.w + fu * (c111.w - c011.w);
1055
1056 if(componentCount >= 1) c001.x = c001.x + fv * (c011.x - c001.x);
1057 if(componentCount >= 2) c001.y = c001.y + fv * (c011.y - c001.y);
1058 if(componentCount >= 3) c001.z = c001.z + fv * (c011.z - c001.z);
1059 if(componentCount >= 4) c001.w = c001.w + fv * (c011.w - c001.w);
1060
1061 // Blend slices
1062 if(componentCount >= 1) c.x = c000.x + fw * (c001.x - c000.x);
1063 if(componentCount >= 2) c.y = c000.y + fw * (c001.y - c000.y);
1064 if(componentCount >= 3) c.z = c000.z + fw * (c001.z - c000.z);
1065 if(componentCount >= 4) c.w = c000.w + fw * (c001.w - c000.w);
1066 }
1067
1068 return c;
1069 }
1070
log2sqrt(Float lod)1071 static Float log2sqrt(Float lod)
1072 {
1073 // log2(sqrt(lod)) // Equals 0.25 * log2(lod^2).
1074 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
1075 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
1076 lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length).
1077
1078 return lod;
1079 }
1080
log2(Float lod)1081 static Float log2(Float lod)
1082 {
1083 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision.
1084 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias.
1085 lod *= As<Float>(Int(0x33800000)); // Scale by 0.5 * 2^-23 (mantissa length).
1086
1087 return lod;
1088 }
1089
computeLod1D(Pointer<Byte> & texture,Float & lod,Float4 & uuuu,Float4 & dsx,Float4 & dsy,SamplerFunction function)1090 void SamplerCore::computeLod1D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &dsx, Float4 &dsy, SamplerFunction function)
1091 {
1092 Float4 dudxy;
1093
1094 if(function != Grad) // Implicit
1095 {
1096 dudxy = uuuu.yz - uuuu.xx;
1097 }
1098 else
1099 {
1100 dudxy = UnpackLow(dsx, dsy);
1101 }
1102
1103 // Scale by texture dimensions.
1104 Float4 dUdxy = dudxy * *Pointer<Float4>(texture + OFFSET(Texture, widthWidthHeightHeight));
1105
1106 // Note we could take the absolute value here and omit the square root below,
1107 // but this is more consistent with the 2D calculation and still cheap.
1108 Float4 dU2dxy = dUdxy * dUdxy;
1109
1110 lod = Max(Float(dU2dxy.x), Float(dU2dxy.y));
1111 lod = log2sqrt(lod);
1112 }
1113
computeLod2D(Pointer<Byte> & texture,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Float4 & uuuu,Float4 & vvvv,Float4 & dsx,Float4 & dsy,SamplerFunction function)1114 void SamplerCore::computeLod2D(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, Float4 &dsx, Float4 &dsy, SamplerFunction function)
1115 {
1116 Float4 duvdxy;
1117
1118 if(function != Grad) // Implicit
1119 {
1120 duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
1121 }
1122 else
1123 {
1124 Float4 dudxy = Float4(dsx.xx, dsy.xx);
1125 Float4 dvdxy = Float4(dsx.yy, dsy.yy);
1126
1127 duvdxy = Float4(dudxy.xz, dvdxy.xz);
1128 }
1129
1130 // Scale by texture dimensions.
1131 Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture, widthWidthHeightHeight));
1132
1133 Float4 dUV2dxy = dUVdxy * dUVdxy;
1134 Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
1135
1136 lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis
1137
1138 if(state.textureFilter == FILTER_ANISOTROPIC)
1139 {
1140 Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z));
1141
1142 Float4 dudx = duvdxy.xxxx;
1143 Float4 dudy = duvdxy.yyyy;
1144 Float4 dvdx = duvdxy.zzzz;
1145 Float4 dvdy = duvdxy.wwww;
1146
1147 Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y));
1148 uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask)));
1149 vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask)));
1150
1151 anisotropy = lod * Rcp(det, Precision::Relaxed);
1152 anisotropy = Min(anisotropy, state.maxAnisotropy);
1153
1154 // TODO(b/151263485): While we always need `lod` above, when there's only
1155 // a single mipmap level the following calculations could be skipped.
1156 lod *= Rcp(anisotropy * anisotropy, Precision::Relaxed);
1157 }
1158
1159 lod = log2sqrt(lod); // log2(sqrt(lod))
1160 }
1161
computeLodCube(Pointer<Byte> & texture,Float & lod,Float4 & u,Float4 & v,Float4 & w,Float4 & dsx,Float4 & dsy,Float4 & M,SamplerFunction function)1162 void SamplerCore::computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, Float4 &dsx, Float4 &dsy, Float4 &M, SamplerFunction function)
1163 {
1164 Float4 dudxy, dvdxy, dsdxy;
1165
1166 if(function != Grad) // Implicit
1167 {
1168 Float4 U = u * M;
1169 Float4 V = v * M;
1170 Float4 W = w * M;
1171
1172 dudxy = Abs(U - U.xxxx);
1173 dvdxy = Abs(V - V.xxxx);
1174 dsdxy = Abs(W - W.xxxx);
1175 }
1176 else
1177 {
1178 dudxy = Float4(dsx.xx, dsy.xx);
1179 dvdxy = Float4(dsx.yy, dsy.yy);
1180 dsdxy = Float4(dsx.zz, dsy.zz);
1181
1182 dudxy = Abs(dudxy * Float4(M.x));
1183 dvdxy = Abs(dvdxy * Float4(M.x));
1184 dsdxy = Abs(dsdxy * Float4(M.x));
1185 }
1186
1187 // Compute the largest Manhattan distance in two dimensions.
1188 // This takes the footprint across adjacent faces into account.
1189 Float4 duvdxy = dudxy + dvdxy;
1190 Float4 dusdxy = dudxy + dsdxy;
1191 Float4 dvsdxy = dvdxy + dsdxy;
1192
1193 dudxy = Max(Max(duvdxy, dusdxy), dvsdxy);
1194
1195 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
1196
1197 // Scale by texture dimension.
1198 lod *= *Pointer<Float>(texture + OFFSET(Texture, width));
1199
1200 lod = log2(lod);
1201 }
1202
computeLod3D(Pointer<Byte> & texture,Float & lod,Float4 & uuuu,Float4 & vvvv,Float4 & wwww,Float4 & dsx,Float4 & dsy,SamplerFunction function)1203 void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, Float4 &dsx, Float4 &dsy, SamplerFunction function)
1204 {
1205 Float4 dudxy, dvdxy, dsdxy;
1206
1207 if(function != Grad) // Implicit
1208 {
1209 dudxy = uuuu - uuuu.xxxx;
1210 dvdxy = vvvv - vvvv.xxxx;
1211 dsdxy = wwww - wwww.xxxx;
1212 }
1213 else
1214 {
1215 dudxy = Float4(dsx.xx, dsy.xx);
1216 dvdxy = Float4(dsx.yy, dsy.yy);
1217 dsdxy = Float4(dsx.zz, dsy.zz);
1218 }
1219
1220 // Scale by texture dimensions.
1221 dudxy *= *Pointer<Float4>(texture + OFFSET(Texture, width));
1222 dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture, height));
1223 dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture, depth));
1224
1225 dudxy *= dudxy;
1226 dvdxy *= dvdxy;
1227 dsdxy *= dsdxy;
1228
1229 dudxy += dvdxy;
1230 dudxy += dsdxy;
1231
1232 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z);
1233
1234 lod = log2sqrt(lod); // log2(sqrt(lod))
1235 }
1236
cubeFace(Float4 & U,Float4 & V,Float4 & x,Float4 & y,Float4 & z,Float4 & M)1237 Int4 SamplerCore::cubeFace(Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
1238 {
1239 // TODO: Comply with Vulkan recommendation:
1240 // Vulkan 1.1: "The rules should have as the first rule that rz wins over ry and rx, and the second rule that ry wins over rx."
1241
1242 Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0
1243 Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0
1244 Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0
1245
1246 Float4 absX = Abs(x);
1247 Float4 absY = Abs(y);
1248 Float4 absZ = Abs(z);
1249
1250 Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y)
1251 Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z)
1252 Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x)
1253 Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z)
1254 Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x)
1255 Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y)
1256
1257 // FACE_POSITIVE_X = 000b
1258 // FACE_NEGATIVE_X = 001b
1259 // FACE_POSITIVE_Y = 010b
1260 // FACE_NEGATIVE_Y = 011b
1261 // FACE_POSITIVE_Z = 100b
1262 // FACE_NEGATIVE_Z = 101b
1263
1264 Int yAxis = SignMask(yMajor);
1265 Int zAxis = SignMask(zMajor);
1266
1267 Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
1268 Int negative = SignMask(n);
1269
1270 Int faces = *Pointer<Int>(constants + OFFSET(Constants, transposeBit0) + negative * 4);
1271 faces |= *Pointer<Int>(constants + OFFSET(Constants, transposeBit1) + yAxis * 4);
1272 faces |= *Pointer<Int>(constants + OFFSET(Constants, transposeBit2) + zAxis * 4);
1273
1274 Int4 face;
1275 face.x = faces & 0x7;
1276 face.y = (faces >> 4) & 0x7;
1277 face.z = (faces >> 8) & 0x7;
1278 face.w = (faces >> 12) & 0x7;
1279
1280 M = Max(Max(absX, absY), absZ);
1281
1282 // U = xMajor ? (neg ^ -z) : ((zMajor & neg) ^ x)
1283 U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x))));
1284
1285 // V = !yMajor ? -y : (n ^ z)
1286 V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z))));
1287
1288 M = reciprocal(M) * Float4(0.5f);
1289 U = U * M + Float4(0.5f);
1290 V = V * M + Float4(0.5f);
1291
1292 return face;
1293 }
1294
applyOffset(Short4 & uvw,Int4 & offset,const Int4 & whd,AddressingMode mode)1295 Short4 SamplerCore::applyOffset(Short4 &uvw, Int4 &offset, const Int4 &whd, AddressingMode mode)
1296 {
1297 Int4 tmp = Int4(As<UShort4>(uvw));
1298 tmp = tmp + offset;
1299
1300 switch(mode)
1301 {
1302 case AddressingMode::ADDRESSING_WRAP:
1303 tmp = (tmp + whd * Int4(-MIN_TEXEL_OFFSET)) % whd;
1304 break;
1305 case AddressingMode::ADDRESSING_CLAMP:
1306 case AddressingMode::ADDRESSING_MIRROR:
1307 case AddressingMode::ADDRESSING_MIRRORONCE:
1308 case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
1309 tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
1310 break;
1311 case AddressingMode::ADDRESSING_SEAMLESS:
1312 ASSERT(false); // Cube sampling doesn't support offset.
1313 default:
1314 ASSERT(false);
1315 }
1316
1317 return As<Short4>(UShort4(tmp));
1318 }
1319
computeIndices(UInt index[4],Short4 uuuu,Short4 vvvv,Short4 wwww,const Short4 & layerIndex,Vector4i & offset,const Int4 & sample,const Pointer<Byte> & mipmap,SamplerFunction function)1320 void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, const Short4 &layerIndex, Vector4i &offset, const Int4 &sample, const Pointer<Byte> &mipmap, SamplerFunction function)
1321 {
1322 uuuu = MulHigh(As<UShort4>(uuuu), UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, width))));
1323
1324 if(function.offset)
1325 {
1326 uuuu = applyOffset(uuuu, offset.x, *Pointer<Int4>(mipmap + OFFSET(Mipmap, width)), state.addressingModeU);
1327 }
1328
1329 UInt4 indices = Int4(uuuu);
1330
1331 if(state.is2D() || state.is3D() || state.isCube())
1332 {
1333 vvvv = MulHigh(As<UShort4>(vvvv), UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, height))));
1334
1335 if(function.offset)
1336 {
1337 vvvv = applyOffset(vvvv, offset.y, *Pointer<Int4>(mipmap + OFFSET(Mipmap, height)), state.addressingModeV);
1338 }
1339
1340 Short4 uv0uv1 = As<Short4>(UnpackLow(uuuu, vvvv));
1341 Short4 uv2uv3 = As<Short4>(UnpackHigh(uuuu, vvvv));
1342 Int2 i01 = MulAdd(uv0uv1, *Pointer<Short4>(mipmap + OFFSET(Mipmap, onePitchP)));
1343 Int2 i23 = MulAdd(uv2uv3, *Pointer<Short4>(mipmap + OFFSET(Mipmap, onePitchP)));
1344
1345 indices = UInt4(As<UInt2>(i01), As<UInt2>(i23));
1346 }
1347
1348 if(state.is3D())
1349 {
1350 wwww = MulHigh(As<UShort4>(wwww), UShort4(*Pointer<Int4>(mipmap + OFFSET(Mipmap, depth))));
1351
1352 if(function.offset)
1353 {
1354 wwww = applyOffset(wwww, offset.z, *Pointer<Int4>(mipmap + OFFSET(Mipmap, depth)), state.addressingModeW);
1355 }
1356
1357 indices += As<UInt4>(Int4(As<UShort4>(wwww))) * *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sliceP));
1358 }
1359
1360 if(state.isArrayed())
1361 {
1362 Int4 layer = Int4(As<UShort4>(layerIndex));
1363
1364 if(state.textureType == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
1365 {
1366 layer *= Int4(6);
1367 }
1368
1369 UInt4 layerOffset = As<UInt4>(layer) * *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sliceP));
1370
1371 indices += layerOffset;
1372 }
1373
1374 if(function.sample)
1375 {
1376 UInt4 sampleOffset = Min(As<UInt4>(sample), *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sampleMax), 16)) *
1377 *Pointer<UInt4>(mipmap + OFFSET(Mipmap, samplePitchP), 16);
1378 indices += sampleOffset;
1379 }
1380
1381 index[0] = Extract(indices, 0);
1382 index[1] = Extract(indices, 1);
1383 index[2] = Extract(indices, 2);
1384 index[3] = Extract(indices, 3);
1385 }
1386
computeIndices(UInt index[4],Int4 uuuu,Int4 vvvv,Int4 wwww,const Int4 & sample,Int4 valid,const Pointer<Byte> & mipmap,SamplerFunction function)1387 void SamplerCore::computeIndices(UInt index[4], Int4 uuuu, Int4 vvvv, Int4 wwww, const Int4 &sample, Int4 valid, const Pointer<Byte> &mipmap, SamplerFunction function)
1388 {
1389 UInt4 indices = uuuu;
1390
1391 if(state.is2D() || state.is3D() || state.isCube())
1392 {
1393 indices += As<UInt4>(vvvv);
1394 }
1395
1396 if(state.is3D() || state.isCube() || state.isArrayed())
1397 {
1398 indices += As<UInt4>(wwww);
1399 }
1400
1401 if(function.sample)
1402 {
1403 indices += Min(As<UInt4>(sample), *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sampleMax), 16)) *
1404 *Pointer<UInt4>(mipmap + OFFSET(Mipmap, samplePitchP), 16);
1405 }
1406
1407 if(borderModeActive())
1408 {
1409 // Texels out of range are still sampled before being replaced
1410 // with the border color, so sample them at linear index 0.
1411 indices &= As<UInt4>(valid);
1412 }
1413
1414 for(int i = 0; i < 4; i++)
1415 {
1416 index[i] = Extract(As<Int4>(indices), i);
1417 }
1418 }
1419
sampleTexel(UInt index[4],Pointer<Byte> buffer)1420 Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer)
1421 {
1422 Vector4s c;
1423
1424 if(has16bitPackedTextureFormat())
1425 {
1426 c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
1427 c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
1428 c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
1429 c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
1430
1431 switch(state.textureFormat)
1432 {
1433 case VK_FORMAT_R5G6B5_UNORM_PACK16:
1434 c.z = (c.x & Short4(0x001Fu)) << 11;
1435 c.y = (c.x & Short4(0x07E0u)) << 5;
1436 c.x = (c.x & Short4(0xF800u));
1437 break;
1438 case VK_FORMAT_B5G6R5_UNORM_PACK16:
1439 c.z = (c.x & Short4(0xF800u));
1440 c.y = (c.x & Short4(0x07E0u)) << 5;
1441 c.x = (c.x & Short4(0x001Fu)) << 11;
1442 break;
1443 case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1444 c.w = (c.x << 12) & Short4(0xF000u);
1445 c.z = (c.x << 8) & Short4(0xF000u);
1446 c.y = (c.x << 4) & Short4(0xF000u);
1447 c.x = (c.x) & Short4(0xF000u);
1448 break;
1449 case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1450 c.w = (c.x << 12) & Short4(0xF000u);
1451 c.z = (c.x) & Short4(0xF000u);
1452 c.y = (c.x << 4) & Short4(0xF000u);
1453 c.x = (c.x << 8) & Short4(0xF000u);
1454 break;
1455 case VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT:
1456 c.w = (c.x) & Short4(0xF000u);
1457 c.z = (c.x << 12) & Short4(0xF000u);
1458 c.y = (c.x << 8) & Short4(0xF000u);
1459 c.x = (c.x << 4) & Short4(0xF000u);
1460 break;
1461 case VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT:
1462 c.w = (c.x) & Short4(0xF000u);
1463 c.z = (c.x << 4) & Short4(0xF000u);
1464 c.y = (c.x << 8) & Short4(0xF000u);
1465 c.x = (c.x << 12) & Short4(0xF000u);
1466 break;
1467 case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1468 c.w = (c.x << 15) & Short4(0x8000u);
1469 c.z = (c.x << 10) & Short4(0xF800u);
1470 c.y = (c.x << 5) & Short4(0xF800u);
1471 c.x = (c.x) & Short4(0xF800u);
1472 break;
1473 case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1474 c.w = (c.x << 15) & Short4(0x8000u);
1475 c.z = (c.x) & Short4(0xF800u);
1476 c.y = (c.x << 5) & Short4(0xF800u);
1477 c.x = (c.x << 10) & Short4(0xF800u);
1478 break;
1479 case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1480 c.w = (c.x) & Short4(0x8000u);
1481 c.z = (c.x << 11) & Short4(0xF800u);
1482 c.y = (c.x << 6) & Short4(0xF800u);
1483 c.x = (c.x << 1) & Short4(0xF800u);
1484 break;
1485 default:
1486 ASSERT(false);
1487 }
1488 }
1489 else if(has8bitTextureComponents())
1490 {
1491 switch(textureComponentCount())
1492 {
1493 case 4:
1494 {
1495 Byte4 c0 = Pointer<Byte4>(buffer)[index[0]];
1496 Byte4 c1 = Pointer<Byte4>(buffer)[index[1]];
1497 Byte4 c2 = Pointer<Byte4>(buffer)[index[2]];
1498 Byte4 c3 = Pointer<Byte4>(buffer)[index[3]];
1499 c.x = Unpack(c0, c1);
1500 c.y = Unpack(c2, c3);
1501
1502 switch(state.textureFormat)
1503 {
1504 case VK_FORMAT_B8G8R8A8_UNORM:
1505 case VK_FORMAT_B8G8R8A8_SRGB:
1506 c.z = As<Short4>(UnpackLow(c.x, c.y));
1507 c.x = As<Short4>(UnpackHigh(c.x, c.y));
1508 c.y = c.z;
1509 c.w = c.x;
1510 c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
1511 c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
1512 c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
1513 c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
1514 break;
1515 case VK_FORMAT_R8G8B8A8_UNORM:
1516 case VK_FORMAT_R8G8B8A8_SNORM:
1517 case VK_FORMAT_R8G8B8A8_SINT:
1518 case VK_FORMAT_R8G8B8A8_SRGB:
1519 case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1520 case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
1521 case VK_FORMAT_A8B8G8R8_SINT_PACK32:
1522 case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
1523 c.z = As<Short4>(UnpackHigh(c.x, c.y));
1524 c.x = As<Short4>(UnpackLow(c.x, c.y));
1525 c.y = c.x;
1526 c.w = c.z;
1527 c.x = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.x));
1528 c.y = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.y));
1529 c.z = UnpackLow(As<Byte8>(Short4(0)), As<Byte8>(c.z));
1530 c.w = UnpackHigh(As<Byte8>(Short4(0)), As<Byte8>(c.w));
1531 // Propagate sign bit
1532 if(state.textureFormat == VK_FORMAT_R8G8B8A8_SINT ||
1533 state.textureFormat == VK_FORMAT_A8B8G8R8_SINT_PACK32)
1534 {
1535 c.x >>= 8;
1536 c.y >>= 8;
1537 c.z >>= 8;
1538 c.w >>= 8;
1539 }
1540 break;
1541 case VK_FORMAT_R8G8B8A8_UINT:
1542 case VK_FORMAT_A8B8G8R8_UINT_PACK32:
1543 c.z = As<Short4>(UnpackHigh(c.x, c.y));
1544 c.x = As<Short4>(UnpackLow(c.x, c.y));
1545 c.y = c.x;
1546 c.w = c.z;
1547 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
1548 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
1549 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
1550 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
1551 break;
1552 default:
1553 ASSERT(false);
1554 }
1555 }
1556 break;
1557 case 2:
1558 c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
1559 c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
1560 c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
1561 c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
1562
1563 switch(state.textureFormat)
1564 {
1565 case VK_FORMAT_R8G8_UNORM:
1566 case VK_FORMAT_R8G8_SNORM:
1567 case VK_FORMAT_R8G8_SRGB:
1568 c.y = (c.x & Short4(0xFF00u));
1569 c.x = (c.x << 8);
1570 break;
1571 case VK_FORMAT_R8G8_SINT:
1572 c.y = c.x >> 8;
1573 c.x = (c.x << 8) >> 8; // Propagate sign bit
1574 break;
1575 case VK_FORMAT_R8G8_UINT:
1576 c.y = As<Short4>(As<UShort4>(c.x) >> 8);
1577 c.x &= Short4(0x00FFu);
1578 break;
1579 default:
1580 ASSERT(false);
1581 }
1582 break;
1583 case 1:
1584 {
1585 Int c0 = Int(*Pointer<Byte>(buffer + index[0]));
1586 Int c1 = Int(*Pointer<Byte>(buffer + index[1]));
1587 Int c2 = Int(*Pointer<Byte>(buffer + index[2]));
1588 Int c3 = Int(*Pointer<Byte>(buffer + index[3]));
1589 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1590
1591 switch(state.textureFormat)
1592 {
1593 case VK_FORMAT_R8_SINT:
1594 case VK_FORMAT_R8_UINT:
1595 case VK_FORMAT_S8_UINT:
1596 {
1597 Int zero(0);
1598 c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero));
1599 // Propagate sign bit
1600 if(state.textureFormat == VK_FORMAT_R8_SINT)
1601 {
1602 c.x = (c.x << 8) >> 8;
1603 }
1604 }
1605 break;
1606 case VK_FORMAT_R8_SNORM:
1607 case VK_FORMAT_R8_UNORM:
1608 case VK_FORMAT_R8_SRGB:
1609 // TODO: avoid populating the low bits at all.
1610 c.x = Unpack(As<Byte4>(c0));
1611 c.x &= Short4(0xFF00u);
1612 break;
1613 default:
1614 c.x = Unpack(As<Byte4>(c0));
1615 break;
1616 }
1617 }
1618 break;
1619 default:
1620 ASSERT(false);
1621 }
1622 }
1623 else if(has16bitTextureComponents())
1624 {
1625 switch(textureComponentCount())
1626 {
1627 case 4:
1628 c.x = Pointer<Short4>(buffer)[index[0]];
1629 c.y = Pointer<Short4>(buffer)[index[1]];
1630 c.z = Pointer<Short4>(buffer)[index[2]];
1631 c.w = Pointer<Short4>(buffer)[index[3]];
1632 transpose4x4(c.x, c.y, c.z, c.w);
1633 break;
1634 case 2:
1635 c.x = *Pointer<Short4>(buffer + 4 * index[0]);
1636 c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer + 4 * index[1])));
1637 c.z = *Pointer<Short4>(buffer + 4 * index[2]);
1638 c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer + 4 * index[3])));
1639 c.y = c.x;
1640 c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
1641 c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
1642 break;
1643 case 1:
1644 c.x = Insert(c.x, Pointer<Short>(buffer)[index[0]], 0);
1645 c.x = Insert(c.x, Pointer<Short>(buffer)[index[1]], 1);
1646 c.x = Insert(c.x, Pointer<Short>(buffer)[index[2]], 2);
1647 c.x = Insert(c.x, Pointer<Short>(buffer)[index[3]], 3);
1648 break;
1649 default:
1650 ASSERT(false);
1651 }
1652 }
1653 else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UNORM_PACK32)
1654 {
1655 Int4 cc;
1656 cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
1657 cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
1658 cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
1659 cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
1660
1661 c.x = Short4(cc << 6) & Short4(0xFFC0u);
1662 c.y = Short4(cc >> 4) & Short4(0xFFC0u);
1663 c.z = Short4(cc >> 14) & Short4(0xFFC0u);
1664 c.w = Short4(cc >> 16) & Short4(0xC000u);
1665 }
1666 else if(state.textureFormat == VK_FORMAT_A2R10G10B10_UNORM_PACK32)
1667 {
1668 Int4 cc;
1669 cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
1670 cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
1671 cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
1672 cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
1673
1674 c.x = Short4(cc >> 14) & Short4(0xFFC0u);
1675 c.y = Short4(cc >> 4) & Short4(0xFFC0u);
1676 c.z = Short4(cc << 6) & Short4(0xFFC0u);
1677 c.w = Short4(cc >> 16) & Short4(0xC000u);
1678 }
1679 else if(state.textureFormat == VK_FORMAT_A2B10G10R10_UINT_PACK32)
1680 {
1681 Int4 cc;
1682 cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
1683 cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
1684 cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
1685 cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
1686
1687 c.x = Short4(cc & Int4(0x3FF));
1688 c.y = Short4((cc >> 10) & Int4(0x3FF));
1689 c.z = Short4((cc >> 20) & Int4(0x3FF));
1690 c.w = Short4((cc >> 30) & Int4(0x3));
1691 }
1692 else if(state.textureFormat == VK_FORMAT_A2R10G10B10_UINT_PACK32)
1693 {
1694 Int4 cc;
1695 cc = Insert(cc, Pointer<Int>(buffer)[index[0]], 0);
1696 cc = Insert(cc, Pointer<Int>(buffer)[index[1]], 1);
1697 cc = Insert(cc, Pointer<Int>(buffer)[index[2]], 2);
1698 cc = Insert(cc, Pointer<Int>(buffer)[index[3]], 3);
1699
1700 c.z = Short4((cc & Int4(0x3FF)));
1701 c.y = Short4(((cc >> 10) & Int4(0x3FF)));
1702 c.x = Short4(((cc >> 20) & Int4(0x3FF)));
1703 c.w = Short4(((cc >> 30) & Int4(0x3)));
1704 }
1705 else
1706 ASSERT(false);
1707
1708 if(state.textureFormat.isSRGBformat())
1709 {
1710 for(int i = 0; i < textureComponentCount(); i++)
1711 {
1712 if(isRGBComponent(i))
1713 {
1714 // The current table-based sRGB conversion requires 0xFF00 to represent 1.0.
1715 ASSERT(state.textureFormat.has8bitTextureComponents());
1716
1717 sRGBtoLinearFF00(c[i]);
1718 }
1719 }
1720 }
1721
1722 return c;
1723 }
1724
sampleTexel(Short4 & uuuu,Short4 & vvvv,Short4 & wwww,const Short4 & layerIndex,Vector4i & offset,const Int4 & sample,Pointer<Byte> & mipmap,Pointer<Byte> buffer,SamplerFunction function)1725 Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, const Short4 &layerIndex, Vector4i &offset, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function)
1726 {
1727 Vector4s c;
1728
1729 UInt index[4];
1730 computeIndices(index, uuuu, vvvv, wwww, layerIndex, offset, sample, mipmap, function);
1731
1732 if(isYcbcrFormat())
1733 {
1734 // Pointers to the planes of YCbCr images are stored in consecutive mipmap levels.
1735 Pointer<Byte> bufferY = buffer; // *Pointer<Pointer<Byte>>(mipmap + 0 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
1736 Pointer<Byte> bufferU = *Pointer<Pointer<Byte>>(mipmap + 1 * sizeof(Mipmap) + OFFSET(Mipmap, buffer)); // U/V for 2-plane interleaved formats.
1737 Pointer<Byte> bufferV = *Pointer<Pointer<Byte>>(mipmap + 2 * sizeof(Mipmap) + OFFSET(Mipmap, buffer));
1738
1739 // Luminance
1740 Int c0 = Int(bufferY[index[0]]);
1741 Int c1 = Int(bufferY[index[1]]);
1742 Int c2 = Int(bufferY[index[2]]);
1743 Int c3 = Int(bufferY[index[3]]);
1744 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1745 UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
1746
1747 UShort4 Cb, Cr;
1748
1749 // Chroma
1750 {
1751 computeIndices(index, uuuu, vvvv, wwww, layerIndex, offset, sample, mipmap + sizeof(Mipmap), function);
1752 UShort4 U, V;
1753
1754 if(state.textureFormat == VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM)
1755 {
1756 c0 = Int(bufferU[index[0]]);
1757 c1 = Int(bufferU[index[1]]);
1758 c2 = Int(bufferU[index[2]]);
1759 c3 = Int(bufferU[index[3]]);
1760 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1761 U = As<UShort4>(Unpack(As<Byte4>(c0)));
1762
1763 c0 = Int(bufferV[index[0]]);
1764 c1 = Int(bufferV[index[1]]);
1765 c2 = Int(bufferV[index[2]]);
1766 c3 = Int(bufferV[index[3]]);
1767 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1768 V = As<UShort4>(Unpack(As<Byte4>(c0)));
1769 }
1770 else if(state.textureFormat == VK_FORMAT_G8_B8R8_2PLANE_420_UNORM)
1771 {
1772 Short4 UV;
1773 UV = Insert(UV, Pointer<Short>(bufferU)[index[0]], 0); // TODO: Insert(UShort4, UShort)
1774 UV = Insert(UV, Pointer<Short>(bufferU)[index[1]], 1);
1775 UV = Insert(UV, Pointer<Short>(bufferU)[index[2]], 2);
1776 UV = Insert(UV, Pointer<Short>(bufferU)[index[3]], 3);
1777 U = (UV & Short4(0x00FFu)) | (UV << 8);
1778 V = (UV & Short4(0xFF00u)) | As<Short4>(As<UShort4>(UV) >> 8);
1779 }
1780 else
1781 UNSUPPORTED("state.textureFormat %d", (int)state.textureFormat);
1782
1783 if(!state.swappedChroma)
1784 {
1785 Cb = U;
1786 Cr = V;
1787 }
1788 else
1789 {
1790 Cb = V;
1791 Cr = U;
1792 }
1793 }
1794
1795 if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)
1796 {
1797 // YCbCr formats are treated as signed 15-bit.
1798 c.x = Cr >> 1;
1799 c.y = Y >> 1;
1800 c.z = Cb >> 1;
1801 }
1802 else
1803 {
1804 // Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
1805 // Scale down by 0x0101 to normalize the 8.8 samples, and up by 0x7FFF for signed 15-bit output.
1806 float yOffset = static_cast<float>(state.studioSwing ? 16 * 0x0101 : 0);
1807 float uvOffset = static_cast<float>(128 * 0x0101);
1808 float yFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 219 * 0x0101 : 255 * 0x0101);
1809 float uvFactor = static_cast<float>(0x7FFF) / static_cast<float>(state.studioSwing ? 224 * 0x0101 : 255 * 0x0101);
1810
1811 Float4 y = (Float4(Y) - Float4(yOffset)) * Float4(yFactor);
1812 Float4 u = (Float4(Cb) - Float4(uvOffset)) * Float4(uvFactor);
1813 Float4 v = (Float4(Cr) - Float4(uvOffset)) * Float4(uvFactor);
1814
1815 if(state.ycbcrModel == VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
1816 {
1817 c.x = Short4(v);
1818 c.y = Short4(y);
1819 c.z = Short4(u);
1820 }
1821 else
1822 {
1823 // Generic YCbCr to RGB transformation:
1824 // R = Y + 2 * (1 - Kr) * Cr
1825 // G = Y - 2 * Kb * (1 - Kb) / Kg * Cb - 2 * Kr * (1 - Kr) / Kg * Cr
1826 // B = Y + 2 * (1 - Kb) * Cb
1827
1828 float Kb = 0.114f;
1829 float Kr = 0.299f;
1830
1831 switch(state.ycbcrModel)
1832 {
1833 case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
1834 Kb = 0.0722f;
1835 Kr = 0.2126f;
1836 break;
1837 case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
1838 Kb = 0.114f;
1839 Kr = 0.299f;
1840 break;
1841 case VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
1842 Kb = 0.0593f;
1843 Kr = 0.2627f;
1844 break;
1845 default:
1846 UNSUPPORTED("ycbcrModel %d", int(state.ycbcrModel));
1847 }
1848
1849 const float Kg = 1.0f - Kr - Kb;
1850
1851 const float Rr = 2 * (1 - Kr);
1852 const float Gb = -2 * Kb * (1 - Kb) / Kg;
1853 const float Gr = -2 * Kr * (1 - Kr) / Kg;
1854 const float Bb = 2 * (1 - Kb);
1855
1856 Float4 r = y + Float4(Rr) * v;
1857 Float4 g = y + Float4(Gb) * u + Float4(Gr) * v;
1858 Float4 b = y + Float4(Bb) * u;
1859
1860 c.x = Short4(r);
1861 c.y = Short4(g);
1862 c.z = Short4(b);
1863 }
1864 }
1865 }
1866 else
1867 {
1868 return sampleTexel(index, buffer);
1869 }
1870
1871 return c;
1872 }
1873
sampleTexel(Int4 & uuuu,Int4 & vvvv,Int4 & wwww,Float4 & dRef,const Int4 & sample,Pointer<Byte> & mipmap,Pointer<Byte> buffer,SamplerFunction function)1874 Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &dRef, const Int4 &sample, Pointer<Byte> &mipmap, Pointer<Byte> buffer, SamplerFunction function)
1875 {
1876 Int4 valid;
1877
1878 if(borderModeActive())
1879 {
1880 // Valid texels have positive coordinates.
1881 Int4 negative = uuuu;
1882 if(state.is2D() || state.is3D() || state.isCube()) negative |= vvvv;
1883 if(state.is3D() || state.isCube() || state.isArrayed()) negative |= wwww;
1884 valid = CmpNLT(negative, Int4(0));
1885 }
1886
1887 UInt index[4];
1888 computeIndices(index, uuuu, vvvv, wwww, sample, valid, mipmap, function);
1889
1890 Vector4f c;
1891
1892 if(hasFloatTexture() || has32bitIntegerTextureComponents())
1893 {
1894 UInt4 t0, t1, t2, t3;
1895
1896 switch(state.textureFormat)
1897 {
1898 case VK_FORMAT_R16_SFLOAT:
1899 t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 2));
1900 t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 2));
1901 t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 2));
1902 t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 2));
1903
1904 c.x.x = Extract(As<Float4>(halfToFloatBits(t0)), 0);
1905 c.x.y = Extract(As<Float4>(halfToFloatBits(t1)), 0);
1906 c.x.z = Extract(As<Float4>(halfToFloatBits(t2)), 0);
1907 c.x.w = Extract(As<Float4>(halfToFloatBits(t3)), 0);
1908 break;
1909 case VK_FORMAT_R16G16_SFLOAT:
1910 t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 4));
1911 t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 4));
1912 t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 4));
1913 t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 4));
1914
1915 // FIXME: shuffles
1916 c.x = As<Float4>(halfToFloatBits(t0));
1917 c.y = As<Float4>(halfToFloatBits(t1));
1918 c.z = As<Float4>(halfToFloatBits(t2));
1919 c.w = As<Float4>(halfToFloatBits(t3));
1920 transpose4x4(c.x, c.y, c.z, c.w);
1921 break;
1922 case VK_FORMAT_R16G16B16A16_SFLOAT:
1923 t0 = Int4(*Pointer<UShort4>(buffer + index[0] * 8));
1924 t1 = Int4(*Pointer<UShort4>(buffer + index[1] * 8));
1925 t2 = Int4(*Pointer<UShort4>(buffer + index[2] * 8));
1926 t3 = Int4(*Pointer<UShort4>(buffer + index[3] * 8));
1927
1928 c.x = As<Float4>(halfToFloatBits(t0));
1929 c.y = As<Float4>(halfToFloatBits(t1));
1930 c.z = As<Float4>(halfToFloatBits(t2));
1931 c.w = As<Float4>(halfToFloatBits(t3));
1932 transpose4x4(c.x, c.y, c.z, c.w);
1933 break;
1934 case VK_FORMAT_R32_SFLOAT:
1935 case VK_FORMAT_R32_SINT:
1936 case VK_FORMAT_R32_UINT:
1937 case VK_FORMAT_D32_SFLOAT:
1938 // FIXME: Optimal shuffling?
1939 c.x.x = *Pointer<Float>(buffer + index[0] * 4);
1940 c.x.y = *Pointer<Float>(buffer + index[1] * 4);
1941 c.x.z = *Pointer<Float>(buffer + index[2] * 4);
1942 c.x.w = *Pointer<Float>(buffer + index[3] * 4);
1943 break;
1944 case VK_FORMAT_R32G32_SFLOAT:
1945 case VK_FORMAT_R32G32_SINT:
1946 case VK_FORMAT_R32G32_UINT:
1947 // FIXME: Optimal shuffling?
1948 c.x.xy = *Pointer<Float4>(buffer + index[0] * 8);
1949 c.x.zw = *Pointer<Float4>(buffer + index[1] * 8 - 8);
1950 c.z.xy = *Pointer<Float4>(buffer + index[2] * 8);
1951 c.z.zw = *Pointer<Float4>(buffer + index[3] * 8 - 8);
1952 c.y = c.x;
1953 c.x = Float4(c.x.xz, c.z.xz);
1954 c.y = Float4(c.y.yw, c.z.yw);
1955 break;
1956 case VK_FORMAT_R32G32B32A32_SFLOAT:
1957 case VK_FORMAT_R32G32B32A32_SINT:
1958 case VK_FORMAT_R32G32B32A32_UINT:
1959 c.x = *Pointer<Float4>(buffer + index[0] * 16, 16);
1960 c.y = *Pointer<Float4>(buffer + index[1] * 16, 16);
1961 c.z = *Pointer<Float4>(buffer + index[2] * 16, 16);
1962 c.w = *Pointer<Float4>(buffer + index[3] * 16, 16);
1963 transpose4x4(c.x, c.y, c.z, c.w);
1964 break;
1965 case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
1966 {
1967 Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
1968 t.x = *Pointer<Float>(buffer + index[0] * 4);
1969 t.y = *Pointer<Float>(buffer + index[1] * 4);
1970 t.z = *Pointer<Float>(buffer + index[2] * 4);
1971 t.w = *Pointer<Float>(buffer + index[3] * 4);
1972 t0 = As<UInt4>(t);
1973 c.w = Float4(UInt4(1) << ((t0 >> 27) & UInt4(0x1F))) * Float4(1.0f / (1 << 24));
1974 c.x = Float4(t0 & UInt4(0x1FF)) * c.w;
1975 c.y = Float4((t0 >> 9) & UInt4(0x1FF)) * c.w;
1976 c.z = Float4((t0 >> 18) & UInt4(0x1FF)) * c.w;
1977 }
1978 break;
1979 case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
1980 {
1981 Float4 t; // TODO: add Insert(UInt4, RValue<UInt>)
1982 t.x = *Pointer<Float>(buffer + index[0] * 4);
1983 t.y = *Pointer<Float>(buffer + index[1] * 4);
1984 t.z = *Pointer<Float>(buffer + index[2] * 4);
1985 t.w = *Pointer<Float>(buffer + index[3] * 4);
1986 t0 = As<UInt4>(t);
1987 c.x = As<Float4>(halfToFloatBits((t0 << 4) & UInt4(0x7FF0)));
1988 c.y = As<Float4>(halfToFloatBits((t0 >> 7) & UInt4(0x7FF0)));
1989 c.z = As<Float4>(halfToFloatBits((t0 >> 17) & UInt4(0x7FE0)));
1990 }
1991 break;
1992 default:
1993 UNSUPPORTED("Format %d", VkFormat(state.textureFormat));
1994 }
1995 }
1996 else
1997 {
1998 ASSERT(!isYcbcrFormat());
1999
2000 Vector4s cs = sampleTexel(index, buffer);
2001
2002 bool isInteger = state.textureFormat.isUnnormalizedInteger();
2003 int componentCount = textureComponentCount();
2004 for(int n = 0; n < componentCount; n++)
2005 {
2006 if(hasUnsignedTextureComponent(n))
2007 {
2008 if(isInteger)
2009 {
2010 c[n] = As<Float4>(Int4(As<UShort4>(cs[n])));
2011 }
2012 else
2013 {
2014 c[n] = Float4(As<UShort4>(cs[n]));
2015 }
2016 }
2017 else
2018 {
2019 if(isInteger)
2020 {
2021 c[n] = As<Float4>(Int4(cs[n]));
2022 }
2023 else
2024 {
2025 c[n] = Float4(cs[n]);
2026 }
2027 }
2028 }
2029 }
2030
2031 if(state.compareEnable)
2032 {
2033 Float4 ref = dRef;
2034
2035 if(!hasFloatTexture())
2036 {
2037 // D16_UNORM: clamp reference, normalize texel value
2038 ref = Min(Max(ref, Float4(0.0f)), Float4(1.0f));
2039 c.x = c.x * Float4(1.0f / 0xFFFF);
2040 }
2041
2042 Int4 boolean;
2043
2044 switch(state.compareOp)
2045 {
2046 case VK_COMPARE_OP_LESS_OR_EQUAL: boolean = CmpLE(ref, c.x); break;
2047 case VK_COMPARE_OP_GREATER_OR_EQUAL: boolean = CmpNLT(ref, c.x); break;
2048 case VK_COMPARE_OP_LESS: boolean = CmpLT(ref, c.x); break;
2049 case VK_COMPARE_OP_GREATER: boolean = CmpNLE(ref, c.x); break;
2050 case VK_COMPARE_OP_EQUAL: boolean = CmpEQ(ref, c.x); break;
2051 case VK_COMPARE_OP_NOT_EQUAL: boolean = CmpNEQ(ref, c.x); break;
2052 case VK_COMPARE_OP_ALWAYS: boolean = Int4(-1); break;
2053 case VK_COMPARE_OP_NEVER: boolean = Int4(0); break;
2054 default: ASSERT(false);
2055 }
2056
2057 c.x = As<Float4>(boolean & As<Int4>(Float4(1.0f)));
2058 c.y = Float4(0.0f);
2059 c.z = Float4(0.0f);
2060 c.w = Float4(1.0f);
2061 }
2062
2063 if(borderModeActive())
2064 {
2065 c = replaceBorderTexel(c, valid);
2066 }
2067
2068 return c;
2069 }
2070
replaceBorderTexel(const Vector4f & c,Int4 valid)2071 Vector4f SamplerCore::replaceBorderTexel(const Vector4f &c, Int4 valid)
2072 {
2073 Vector4i border;
2074
2075 const bool scaled = !hasFloatTexture() && !hasUnnormalizedIntegerTexture() && !state.compareEnable;
2076 const sw::float4 scaleComp = scaled ? getComponentScale() : sw::float4(1.0f, 1.0f, 1.0f, 1.0f);
2077
2078 switch(state.border)
2079 {
2080 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
2081 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
2082 border.x = Int4(0);
2083 border.y = Int4(0);
2084 border.z = Int4(0);
2085 border.w = Int4(0);
2086 break;
2087 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
2088 border.x = Int4(0);
2089 border.y = Int4(0);
2090 border.z = Int4(0);
2091 border.w = Int4(bit_cast<int>(scaleComp.w));
2092 break;
2093 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
2094 border.x = Int4(0);
2095 border.y = Int4(0);
2096 border.z = Int4(0);
2097 border.w = Int4(1);
2098 break;
2099 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
2100 border.x = Int4(bit_cast<int>(scaleComp.x));
2101 border.y = Int4(bit_cast<int>(scaleComp.y));
2102 border.z = Int4(bit_cast<int>(scaleComp.z));
2103 border.w = Int4(bit_cast<int>(scaleComp.w));
2104 break;
2105 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
2106 border.x = Int4(1);
2107 border.y = Int4(1);
2108 border.z = Int4(1);
2109 border.w = Int4(1);
2110 break;
2111 case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
2112 // This bit-casts from float to int in C++ code instead of Reactor code
2113 // because Reactor does not guarantee preserving infinity (b/140302841).
2114 border.x = Int4(bit_cast<int>(scaleComp.x * state.customBorder.float32[0]));
2115 border.y = Int4(bit_cast<int>(scaleComp.y * state.customBorder.float32[1]));
2116 border.z = Int4(bit_cast<int>(scaleComp.z * state.customBorder.float32[2]));
2117 border.w = Int4(bit_cast<int>(scaleComp.w * state.customBorder.float32[3]));
2118 break;
2119 case VK_BORDER_COLOR_INT_CUSTOM_EXT:
2120 border.x = Int4(state.customBorder.int32[0]);
2121 border.y = Int4(state.customBorder.int32[1]);
2122 border.z = Int4(state.customBorder.int32[2]);
2123 border.w = Int4(state.customBorder.int32[3]);
2124 break;
2125 default:
2126 UNSUPPORTED("sint/uint/sfloat border: %u", state.border);
2127 }
2128
2129 Vector4f out;
2130 out.x = As<Float4>((valid & As<Int4>(c.x)) | (~valid & border.x)); // TODO: IfThenElse()
2131 out.y = As<Float4>((valid & As<Int4>(c.y)) | (~valid & border.y));
2132 out.z = As<Float4>((valid & As<Int4>(c.z)) | (~valid & border.z));
2133 out.w = As<Float4>((valid & As<Int4>(c.w)) | (~valid & border.w));
2134
2135 return out;
2136 }
2137
selectMipmap(const Pointer<Byte> & texture,Pointer<Byte> & mipmap,Pointer<Byte> & buffer,const Float & lod,bool secondLOD)2138 void SamplerCore::selectMipmap(const Pointer<Byte> &texture, Pointer<Byte> &mipmap, Pointer<Byte> &buffer, const Float &lod, bool secondLOD)
2139 {
2140 Pointer<Byte> mipmap0 = texture + OFFSET(Texture, mipmap[0]);
2141
2142 if(state.mipmapFilter == MIPMAP_NONE)
2143 {
2144 mipmap = mipmap0;
2145 }
2146 else
2147 {
2148 Int ilod;
2149
2150 if(state.mipmapFilter == MIPMAP_POINT)
2151 {
2152 // TODO: Preferred formula is ceil(lod + 0.5) - 1
2153 ilod = RoundInt(lod);
2154 }
2155 else // MIPMAP_LINEAR
2156 {
2157 ilod = Int(lod);
2158 }
2159
2160 mipmap = mipmap0 + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
2161 }
2162
2163 buffer = *Pointer<Pointer<Byte>>(mipmap + OFFSET(Mipmap, buffer));
2164 }
2165
computeFilterOffset(Float & lod)2166 Int4 SamplerCore::computeFilterOffset(Float &lod)
2167 {
2168 if(state.textureFilter == FILTER_POINT)
2169 {
2170 return Int4(0);
2171 }
2172 else if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
2173 {
2174 return CmpNLE(Float4(lod), Float4(0.0f));
2175 }
2176 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
2177 {
2178 return CmpLE(Float4(lod), Float4(0.0f));
2179 }
2180
2181 return Int4(~0);
2182 }
2183
address(const Float4 & uw,AddressingMode addressingMode,Pointer<Byte> & mipmap)2184 Short4 SamplerCore::address(const Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap)
2185 {
2186 if(addressingMode == ADDRESSING_UNUSED)
2187 {
2188 return Short4(0); // TODO(b/134669567): Optimize for 1D filtering
2189 }
2190 else if(addressingMode == ADDRESSING_CLAMP || addressingMode == ADDRESSING_BORDER)
2191 {
2192 Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f));
2193
2194 return Short4(Int4(clamp * Float4(1 << 16)));
2195 }
2196 else if(addressingMode == ADDRESSING_MIRROR)
2197 {
2198 Int4 convert = Int4(uw * Float4(1 << 16));
2199 Int4 mirror = (convert << 15) >> 31;
2200
2201 convert ^= mirror;
2202
2203 return Short4(convert);
2204 }
2205 else if(addressingMode == ADDRESSING_MIRRORONCE)
2206 {
2207 // Absolute value
2208 Int4 convert = Int4(Abs(uw * Float4(1 << 16)));
2209
2210 // Clamp
2211 convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000);
2212 convert = As<Int4>(PackSigned(convert, convert));
2213
2214 return As<Short4>(Int2(convert)) + Short4(0x8000u);
2215 }
2216 else // Wrap
2217 {
2218 return Short4(Int4(uw * Float4(1 << 16)));
2219 }
2220 }
2221
computeLayerIndex(const Float4 & a,Pointer<Byte> & mipmap)2222 Short4 SamplerCore::computeLayerIndex(const Float4 &a, Pointer<Byte> &mipmap)
2223 {
2224 if(!state.isArrayed())
2225 {
2226 return {};
2227 }
2228
2229 Int4 layers = *Pointer<Int4>(mipmap + OFFSET(Mipmap, depth));
2230
2231 return Short4(Min(Max(RoundInt(a), Int4(0)), layers - Int4(1)));
2232 }
2233
2234 // TODO: Eliminate when the gather + mirror addressing case is handled by mirroring the footprint.
mirror(Int4 n)2235 static Int4 mirror(Int4 n)
2236 {
2237 auto positive = CmpNLT(n, Int4(0));
2238 return (positive & n) | (~positive & (-(Int4(1) + n)));
2239 }
2240
mod(Int4 n,Int4 d)2241 static Int4 mod(Int4 n, Int4 d)
2242 {
2243 auto x = n % d;
2244 auto positive = CmpNLT(x, Int4(0));
2245 return (positive & x) | (~positive & (x + d));
2246 }
2247
address(const Float4 & uvw,Int4 & xyz0,Int4 & xyz1,Float4 & f,Pointer<Byte> & mipmap,Int4 & offset,Int4 & filter,int whd,AddressingMode addressingMode,SamplerFunction function)2248 void SamplerCore::address(const Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Int4 &offset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function)
2249 {
2250 if(addressingMode == ADDRESSING_UNUSED)
2251 {
2252 f = Float4(0.0f); // TODO(b/134669567): Optimize for 1D filtering
2253 return;
2254 }
2255
2256 Int4 dim = *Pointer<Int4>(mipmap + whd, 16);
2257 Int4 maxXYZ = dim - Int4(1);
2258
2259 if(function == Fetch) // Unnormalized coordinates
2260 {
2261 Int4 xyz = function.offset ? As<Int4>(uvw) + offset : As<Int4>(uvw);
2262 xyz0 = Min(Max(xyz, Int4(0)), maxXYZ);
2263
2264 // VK_EXT_image_robustness requires checking for out-of-bounds accesses.
2265 // TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
2266 // If the above clamping altered the result, the access is out-of-bounds.
2267 // In that case set the coordinate to -1 to perform texel replacement later.
2268 Int4 outOfBounds = CmpNEQ(xyz, xyz0);
2269 xyz0 |= outOfBounds;
2270 }
2271 else if(addressingMode == ADDRESSING_CUBEFACE)
2272 {
2273 xyz0 = As<Int4>(uvw);
2274 }
2275 else
2276 {
2277 const int halfBits = 0x3EFFFFFF; // Value just under 0.5f
2278 const int oneBits = 0x3F7FFFFF; // Value just under 1.0f
2279 const int twoBits = 0x3FFFFFFF; // Value just under 2.0f
2280
2281 Float4 coord = uvw;
2282
2283 if(state.unnormalizedCoordinates)
2284 {
2285 switch(addressingMode)
2286 {
2287 case ADDRESSING_CLAMP:
2288 coord = Min(Max(coord, Float4(0.0f)), Float4(dim) * As<Float4>(Int4(oneBits)));
2289 break;
2290 case ADDRESSING_BORDER:
2291 // Don't map to a valid range here.
2292 break;
2293 default:
2294 // "If unnormalizedCoordinates is VK_TRUE, addressModeU and addressModeV must each be
2295 // either VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE or VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER"
2296 UNREACHABLE("addressingMode %d", int(addressingMode));
2297 break;
2298 }
2299 }
2300 else if(state.textureFilter == FILTER_GATHER && addressingMode == ADDRESSING_MIRROR)
2301 {
2302 // Gather requires the 'footprint' of the texels from which a component is taken, to also mirror around.
2303 // Therefore we can't just compute one texel's location and find the other ones at +1 offsets from it.
2304 // Here we handle that case separately by doing the mirroring per texel coordinate.
2305 // TODO: Mirror the footprint by adjusting the sign of the 0.5f and 1 offsets.
2306
2307 coord = coord * Float4(dim);
2308 coord -= Float4(0.5f);
2309 Float4 floor = Floor(coord);
2310 xyz0 = Int4(floor);
2311
2312 if(function.offset)
2313 {
2314 xyz0 += offset;
2315 }
2316
2317 xyz1 = xyz0 + Int4(1);
2318
2319 xyz0 = (maxXYZ)-mirror(mod(xyz0, Int4(2) * dim) - dim);
2320 xyz1 = (maxXYZ)-mirror(mod(xyz1, Int4(2) * dim) - dim);
2321
2322 return;
2323 }
2324 else
2325 {
2326 if(!function.offset)
2327 {
2328 switch(addressingMode)
2329 {
2330 case ADDRESSING_CLAMP:
2331 case ADDRESSING_SEAMLESS:
2332 // While cube face coordinates are nominally already in the [0.0, 1.0] range
2333 // due to the projection, and numerical imprecision is tolerated due to the
2334 // border of pixels for seamless filtering, the projection doesn't cause
2335 // range normalization for Inf and NaN values. So we always clamp.
2336 {
2337 Float4 one = As<Float4>(Int4(oneBits));
2338 coord = Min(Max(coord, Float4(0.0f)), one);
2339 }
2340 break;
2341 case ADDRESSING_MIRROR:
2342 {
2343 Float4 half = As<Float4>(Int4(halfBits));
2344 Float4 one = As<Float4>(Int4(oneBits));
2345 Float4 two = As<Float4>(Int4(twoBits));
2346 coord = one - Abs(two * Frac(coord * half) - one);
2347 }
2348 break;
2349 case ADDRESSING_MIRRORONCE:
2350 {
2351 Float4 half = As<Float4>(Int4(halfBits));
2352 Float4 one = As<Float4>(Int4(oneBits));
2353 Float4 two = As<Float4>(Int4(twoBits));
2354 coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one);
2355 }
2356 break;
2357 case ADDRESSING_BORDER:
2358 // Don't map to a valid range here.
2359 break;
2360 default: // Wrap
2361 coord = Frac(coord);
2362 break;
2363 }
2364 }
2365
2366 coord = coord * Float4(dim);
2367 }
2368
2369 if(state.textureFilter == FILTER_POINT)
2370 {
2371 if(addressingMode == ADDRESSING_BORDER || function.offset)
2372 {
2373 xyz0 = Int4(Floor(coord));
2374 }
2375 else // Can't have negative coordinates, so floor() is redundant when casting to int.
2376 {
2377 xyz0 = Int4(coord);
2378 }
2379 }
2380 else
2381 {
2382 if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR ||
2383 state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
2384 {
2385 coord -= As<Float4>(As<Int4>(Float4(0.5f)) & filter);
2386 }
2387 else
2388 {
2389 coord -= Float4(0.5f);
2390 }
2391
2392 Float4 floor = Floor(coord);
2393 xyz0 = Int4(floor);
2394 f = coord - floor;
2395 }
2396
2397 if(function.offset)
2398 {
2399 xyz0 += offset;
2400 }
2401
2402 if(addressingMode == ADDRESSING_SEAMLESS) // Adjust for border.
2403 {
2404 xyz0 += Int4(1);
2405 }
2406
2407 xyz1 = xyz0 - filter; // Increment
2408
2409 if(addressingMode == ADDRESSING_BORDER)
2410 {
2411 // Replace the coordinates with -1 if they're out of range.
2412 Int4 border0 = CmpLT(xyz0, Int4(0)) | CmpNLT(xyz0, dim);
2413 Int4 border1 = CmpLT(xyz1, Int4(0)) | CmpNLT(xyz1, dim);
2414 xyz0 |= border0;
2415 xyz1 |= border1;
2416 }
2417 else if(function.offset)
2418 {
2419 switch(addressingMode)
2420 {
2421 case ADDRESSING_SEAMLESS:
2422 UNREACHABLE("addressingMode %d", int(addressingMode)); // Cube sampling doesn't support offset.
2423 case ADDRESSING_MIRROR:
2424 case ADDRESSING_MIRRORONCE:
2425 // TODO: Implement ADDRESSING_MIRROR and ADDRESSING_MIRRORONCE.
2426 // Fall through to Clamp.
2427 case ADDRESSING_CLAMP:
2428 xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
2429 xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ);
2430 break;
2431 default: // Wrap
2432 xyz0 = mod(xyz0, dim);
2433 xyz1 = mod(xyz1, dim);
2434 break;
2435 }
2436 }
2437 else if(state.textureFilter != FILTER_POINT)
2438 {
2439 switch(addressingMode)
2440 {
2441 case ADDRESSING_SEAMLESS:
2442 break;
2443 case ADDRESSING_MIRROR:
2444 case ADDRESSING_MIRRORONCE:
2445 case ADDRESSING_CLAMP:
2446 xyz0 = Max(xyz0, Int4(0));
2447 xyz1 = Min(xyz1, maxXYZ);
2448 break;
2449 default: // Wrap
2450 {
2451 Int4 under = CmpLT(xyz0, Int4(0));
2452 xyz0 = (under & maxXYZ) | (~under & xyz0); // xyz < 0 ? dim - 1 : xyz // TODO: IfThenElse()
2453
2454 Int4 nover = CmpLT(xyz1, dim);
2455 xyz1 = nover & xyz1; // xyz >= dim ? 0 : xyz
2456 }
2457 break;
2458 }
2459 }
2460 }
2461 }
2462
computeLayerIndex(const Float4 & a,Pointer<Byte> & mipmap,SamplerFunction function)2463 Int4 SamplerCore::computeLayerIndex(const Float4 &a, Pointer<Byte> &mipmap, SamplerFunction function)
2464 {
2465 if(!state.isArrayed())
2466 {
2467 return {};
2468 }
2469
2470 Int4 layers = *Pointer<Int4>(mipmap + OFFSET(Mipmap, depth), 16);
2471 Int4 maxLayer = layers - Int4(1);
2472
2473 if(function == Fetch) // Unnormalized coordinates
2474 {
2475 Int4 xyz = As<Int4>(a);
2476 Int4 xyz0 = Min(Max(xyz, Int4(0)), maxLayer);
2477
2478 // VK_EXT_image_robustness requires checking for out-of-bounds accesses.
2479 // TODO(b/162327166): Only perform bounds checks when VK_EXT_image_robustness is enabled.
2480 // If the above clamping altered the result, the access is out-of-bounds.
2481 // In that case set the coordinate to -1 to perform texel replacement later.
2482 Int4 outOfBounds = CmpNEQ(xyz, xyz0);
2483 xyz0 |= outOfBounds;
2484
2485 return xyz0;
2486 }
2487 else
2488 {
2489 return Min(Max(RoundInt(a), Int4(0)), maxLayer);
2490 }
2491 }
2492
sRGBtoLinearFF00(Short4 & c)2493 void SamplerCore::sRGBtoLinearFF00(Short4 &c)
2494 {
2495 c = As<UShort4>(c) >> 8;
2496
2497 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants, sRGBtoLinearFF_FF00));
2498
2499 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
2500 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
2501 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
2502 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
2503 }
2504
hasFloatTexture() const2505 bool SamplerCore::hasFloatTexture() const
2506 {
2507 return state.textureFormat.isFloatFormat();
2508 }
2509
hasUnnormalizedIntegerTexture() const2510 bool SamplerCore::hasUnnormalizedIntegerTexture() const
2511 {
2512 return state.textureFormat.isUnnormalizedInteger();
2513 }
2514
hasUnsignedTextureComponent(int component) const2515 bool SamplerCore::hasUnsignedTextureComponent(int component) const
2516 {
2517 return state.textureFormat.isUnsignedComponent(component);
2518 }
2519
textureComponentCount() const2520 int SamplerCore::textureComponentCount() const
2521 {
2522 return state.textureFormat.componentCount();
2523 }
2524
has16bitPackedTextureFormat() const2525 bool SamplerCore::has16bitPackedTextureFormat() const
2526 {
2527 return state.textureFormat.has16bitPackedTextureFormat();
2528 }
2529
has8bitTextureComponents() const2530 bool SamplerCore::has8bitTextureComponents() const
2531 {
2532 return state.textureFormat.has8bitTextureComponents();
2533 }
2534
has16bitTextureComponents() const2535 bool SamplerCore::has16bitTextureComponents() const
2536 {
2537 return state.textureFormat.has16bitTextureComponents();
2538 }
2539
has32bitIntegerTextureComponents() const2540 bool SamplerCore::has32bitIntegerTextureComponents() const
2541 {
2542 return state.textureFormat.has32bitIntegerTextureComponents();
2543 }
2544
isYcbcrFormat() const2545 bool SamplerCore::isYcbcrFormat() const
2546 {
2547 return state.textureFormat.isYcbcrFormat();
2548 }
2549
isRGBComponent(int component) const2550 bool SamplerCore::isRGBComponent(int component) const
2551 {
2552 return state.textureFormat.isRGBComponent(component);
2553 }
2554
borderModeActive() const2555 bool SamplerCore::borderModeActive() const
2556 {
2557 return state.addressingModeU == ADDRESSING_BORDER ||
2558 state.addressingModeV == ADDRESSING_BORDER ||
2559 state.addressingModeW == ADDRESSING_BORDER;
2560 }
2561
gatherSwizzle() const2562 VkComponentSwizzle SamplerCore::gatherSwizzle() const
2563 {
2564 switch(state.gatherComponent)
2565 {
2566 case 0: return state.swizzle.r;
2567 case 1: return state.swizzle.g;
2568 case 2: return state.swizzle.b;
2569 case 3: return state.swizzle.a;
2570 default:
2571 UNREACHABLE("Invalid component");
2572 return VK_COMPONENT_SWIZZLE_R;
2573 }
2574 }
2575
getComponentScale() const2576 sw::float4 SamplerCore::getComponentScale() const
2577 {
2578 // TODO(b/204709464): Unlike other formats, the fixed-point representation of the formats below are handled with bit extension.
2579 // This special handling of such formats should be removed later.
2580 switch(state.textureFormat)
2581 {
2582 case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
2583 case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
2584 return sw::float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF);
2585 default:
2586 break;
2587 };
2588
2589 const sw::float4 scale = state.textureFormat.getScale();
2590 const sw::int4 bits = state.textureFormat.bitsPerComponent();
2591 const sw::int4 shift = sw::int4(std::max(16 - bits.x, 0), std::max(16 - bits.y, 0), std::max(16 - bits.z, 0),
2592 std::max(16 - bits.w, 0));
2593
2594 return sw::float4(static_cast<uint16_t>(scale.x) << shift.x,
2595 static_cast<uint16_t>(scale.y) << shift.y,
2596 static_cast<uint16_t>(scale.z) << shift.z,
2597 static_cast<uint16_t>(scale.w) << shift.w);
2598 }
2599
getGatherComponent() const2600 int SamplerCore::getGatherComponent() const
2601 {
2602 VkComponentSwizzle swizzle = gatherSwizzle();
2603
2604 switch(swizzle)
2605 {
2606 default: UNSUPPORTED("VkComponentSwizzle %d", (int)swizzle); return 0;
2607 case VK_COMPONENT_SWIZZLE_R:
2608 case VK_COMPONENT_SWIZZLE_G:
2609 case VK_COMPONENT_SWIZZLE_B:
2610 case VK_COMPONENT_SWIZZLE_A:
2611 // Normalize all components using the gather component scale.
2612 return swizzle - VK_COMPONENT_SWIZZLE_R;
2613 case VK_COMPONENT_SWIZZLE_ZERO:
2614 case VK_COMPONENT_SWIZZLE_ONE:
2615 // These cases are handled later.
2616 return 0;
2617 }
2618
2619 return 0;
2620 }
2621
2622 } // namespace sw
2623