• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "SamplerCore.hpp"
16 
17 #include "Constants.hpp"
18 #include "Debug.hpp"
19 
20 namespace
21 {
applySwizzle(sw::SwizzleType swizzle,sw::Short4 & s,const sw::Vector4s & c)22 	void applySwizzle(sw::SwizzleType swizzle, sw::Short4& s, const sw::Vector4s& c)
23 	{
24 		switch(swizzle)
25 		{
26 		case sw::SWIZZLE_RED:	s = c.x; break;
27 		case sw::SWIZZLE_GREEN: s = c.y; break;
28 		case sw::SWIZZLE_BLUE:  s = c.z; break;
29 		case sw::SWIZZLE_ALPHA: s = c.w; break;
30 		case sw::SWIZZLE_ZERO:  s = sw::Short4(0x0000); break;
31 		case sw::SWIZZLE_ONE:   s = sw::Short4(0x1000); break;
32 		default: ASSERT(false);
33 		}
34 	}
35 
applySwizzle(sw::SwizzleType swizzle,sw::Float4 & f,const sw::Vector4f & c)36 	void applySwizzle(sw::SwizzleType swizzle, sw::Float4& f, const sw::Vector4f& c)
37 	{
38 		switch(swizzle)
39 		{
40 		case sw::SWIZZLE_RED:	f = c.x; break;
41 		case sw::SWIZZLE_GREEN: f = c.y; break;
42 		case sw::SWIZZLE_BLUE:  f = c.z; break;
43 		case sw::SWIZZLE_ALPHA: f = c.w; break;
44 		case sw::SWIZZLE_ZERO:  f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
45 		case sw::SWIZZLE_ONE:   f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); break;
46 		default: ASSERT(false);
47 		}
48 	}
49 }
50 
51 namespace sw
52 {
53 	extern bool colorsDefaultToZero;
54 
SamplerCore(Pointer<Byte> & constants,const Sampler::State & state)55 	SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler::State &state) : constants(constants), state(state)
56 	{
57 	}
58 
sampleTexture(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & dsx,Vector4f & dsy)59 	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy)
60 	{
61 		sampleTexture(texture, c, u, v, w, q, dsx, dsy, dsx, Implicit, true);
62 	}
63 
sampleTexture(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function,bool fixed12)64 	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12)
65 	{
66 		#if PERF_PROFILE
67 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
68 
69 			if(state.compressedFormat)
70 			{
71 				AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
72 			}
73 		#endif
74 
75 		Float4 uuuu = u;
76 		Float4 vvvv = v;
77 		Float4 wwww = w;
78 
79 		if(state.textureType == TEXTURE_NULL)
80 		{
81 			c.x = Short4(0x0000);
82 			c.y = Short4(0x0000);
83 			c.z = Short4(0x0000);
84 
85 			if(fixed12)   // FIXME: Convert to fixed12 at higher level, when required
86 			{
87 				c.w = Short4(0x1000);
88 			}
89 			else
90 			{
91 				c.w = Short4(0xFFFFu);   // FIXME
92 			}
93 		}
94 		else
95 		{
96 			Int face[4];
97 			Float4 lodX;
98 			Float4 lodY;
99 			Float4 lodZ;
100 
101 			if(state.textureType == TEXTURE_CUBE)
102 			{
103 				cubeFace(face, uuuu, vvvv, lodX, lodY, lodZ, u, v, w);
104 			}
105 
106 			Float lod;
107 			Float anisotropy;
108 			Float4 uDelta;
109 			Float4 vDelta;
110 			Float lodBias = (function == Fetch) ? Float4(As<Int4>(q)).x : q.x;
111 
112 			if(state.textureType != TEXTURE_3D)
113 			{
114 				if(state.textureType != TEXTURE_CUBE)
115 				{
116 					computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodBias, dsx, dsy, function);
117 				}
118 				else
119 				{
120 					computeLodCube(texture, lod, lodX, lodY, lodZ, lodBias, dsx, dsy, function);
121 				}
122 			}
123 			else
124 			{
125 				computeLod3D(texture, lod, uuuu, vvvv, wwww, lodBias, dsx, dsy, function);
126 			}
127 
128 			if(!hasFloatTexture())
129 			{
130 				sampleFilter(texture, c, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
131 			}
132 			else
133 			{
134 				Vector4f cf;
135 
136 				sampleFloatFilter(texture, cf, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
137 
138 				convertFixed12(c, cf);
139 			}
140 
141 			if(fixed12 && !hasFloatTexture())
142 			{
143 				if(has16bitTextureFormat())
144 				{
145 					switch(state.textureFormat)
146 					{
147 					case FORMAT_R5G6B5:
148 						if(state.sRGB)
149 						{
150 							sRGBtoLinear16_5_12(c.x);
151 							sRGBtoLinear16_6_12(c.y);
152 							sRGBtoLinear16_5_12(c.z);
153 						}
154 						else
155 						{
156 							c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800));
157 							c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00));
158 							c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800));
159 						}
160 						break;
161 					default:
162 						ASSERT(false);
163 					}
164 				}
165 				else
166 				{
167 					for(int component = 0; component < textureComponentCount(); component++)
168 					{
169 						if(state.sRGB && isRGBComponent(component))
170 						{
171 							sRGBtoLinear16_8_12(c[component]);   // FIXME: Perform linearization at surface level for read-only textures
172 						}
173 						else
174 						{
175 							if(hasUnsignedTextureComponent(component))
176 							{
177 								c[component] = As<UShort4>(c[component]) >> 4;
178 							}
179 							else
180 							{
181 								c[component] = c[component] >> 3;
182 							}
183 						}
184 					}
185 				}
186 			}
187 
188 			if(fixed12 && state.textureFilter != FILTER_GATHER)
189 			{
190 				int componentCount = textureComponentCount();
191 				short defaultColorValue = colorsDefaultToZero ? 0x0000 : 0x1000;
192 
193 				switch(state.textureFormat)
194 				{
195 				case FORMAT_R8I_SNORM:
196 				case FORMAT_G8R8I_SNORM:
197 				case FORMAT_X8B8G8R8I_SNORM:
198 				case FORMAT_A8B8G8R8I_SNORM:
199 				case FORMAT_R8:
200 				case FORMAT_R5G6B5:
201 				case FORMAT_G8R8:
202 				case FORMAT_R8I:
203 				case FORMAT_R8UI:
204 				case FORMAT_G8R8I:
205 				case FORMAT_G8R8UI:
206 				case FORMAT_X8B8G8R8I:
207 				case FORMAT_X8B8G8R8UI:
208 				case FORMAT_A8B8G8R8I:
209 				case FORMAT_A8B8G8R8UI:
210 				case FORMAT_R16I:
211 				case FORMAT_R16UI:
212 				case FORMAT_G16R16:
213 				case FORMAT_G16R16I:
214 				case FORMAT_G16R16UI:
215 				case FORMAT_X16B16G16R16I:
216 				case FORMAT_X16B16G16R16UI:
217 				case FORMAT_A16B16G16R16:
218 				case FORMAT_A16B16G16R16I:
219 				case FORMAT_A16B16G16R16UI:
220 				case FORMAT_R32I:
221 				case FORMAT_R32UI:
222 				case FORMAT_G32R32I:
223 				case FORMAT_G32R32UI:
224 				case FORMAT_X32B32G32R32I:
225 				case FORMAT_X32B32G32R32UI:
226 				case FORMAT_A32B32G32R32I:
227 				case FORMAT_A32B32G32R32UI:
228 				case FORMAT_X8R8G8B8:
229 				case FORMAT_X8B8G8R8:
230 				case FORMAT_A8R8G8B8:
231 				case FORMAT_A8B8G8R8:
232 				case FORMAT_SRGB8_X8:
233 				case FORMAT_SRGB8_A8:
234 				case FORMAT_V8U8:
235 				case FORMAT_Q8W8V8U8:
236 				case FORMAT_X8L8V8U8:
237 				case FORMAT_V16U16:
238 				case FORMAT_A16W16V16U16:
239 				case FORMAT_Q16W16V16U16:
240 				case FORMAT_YV12_BT601:
241 				case FORMAT_YV12_BT709:
242 				case FORMAT_YV12_JFIF:
243 					if(componentCount < 2) c.y = Short4(defaultColorValue);
244 					if(componentCount < 3) c.z = Short4(defaultColorValue);
245 					if(componentCount < 4) c.w = Short4(0x1000);
246 					break;
247 				case FORMAT_A8:
248 					c.w = c.x;
249 					c.x = Short4(0x0000);
250 					c.y = Short4(0x0000);
251 					c.z = Short4(0x0000);
252 					break;
253 				case FORMAT_L8:
254 				case FORMAT_L16:
255 					c.y = c.x;
256 					c.z = c.x;
257 					c.w = Short4(0x1000);
258 					break;
259 				case FORMAT_A8L8:
260 					c.w = c.y;
261 					c.y = c.x;
262 					c.z = c.x;
263 					break;
264 				case FORMAT_R32F:
265 					c.y = Short4(defaultColorValue);
266 				case FORMAT_G32R32F:
267 					c.z = Short4(defaultColorValue);
268 				case FORMAT_X32B32G32R32F:
269 					c.w = Short4(0x1000);
270 				case FORMAT_A32B32G32R32F:
271 					break;
272 				case FORMAT_D32F:
273 				case FORMAT_D32F_LOCKABLE:
274 				case FORMAT_D32FS8_TEXTURE:
275 				case FORMAT_D32FS8_SHADOW:
276 					c.y = c.x;
277 					c.z = c.x;
278 					c.w = c.x;
279 					break;
280 				default:
281 					ASSERT(false);
282 				}
283 			}
284 		}
285 
286 		if(fixed12 &&
287 		   ((state.swizzleR != SWIZZLE_RED) ||
288 		    (state.swizzleG != SWIZZLE_GREEN) ||
289 		    (state.swizzleB != SWIZZLE_BLUE) ||
290 		    (state.swizzleA != SWIZZLE_ALPHA)))
291 		{
292 			const Vector4s col(c);
293 			applySwizzle(state.swizzleR, c.x, col);
294 			applySwizzle(state.swizzleG, c.y, col);
295 			applySwizzle(state.swizzleB, c.z, col);
296 			applySwizzle(state.swizzleA, c.w, col);
297 		}
298 	}
299 
sampleTexture(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Float4 & q,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)300 	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
301 	{
302 		#if PERF_PROFILE
303 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
304 
305 			if(state.compressedFormat)
306 			{
307 				AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
308 			}
309 		#endif
310 
311 		if(state.textureType == TEXTURE_NULL)
312 		{
313 			c.x = Float4(0.0f);
314 			c.y = Float4(0.0f);
315 			c.z = Float4(0.0f);
316 			c.w = Float4(1.0f);
317 		}
318 		else
319 		{
320 			// FIXME: YUV and sRGB are not supported by the floating point path
321 			bool forceFloatFiltering = state.highPrecisionFiltering && !state.sRGB && !hasYuvFormat() && (state.textureFilter != FILTER_POINT);
322 			if(hasFloatTexture() || hasUnnormalizedIntegerTexture() || forceFloatFiltering)   // FIXME: Mostly identical to integer sampling
323 			{
324 				Float4 uuuu = u;
325 				Float4 vvvv = v;
326 				Float4 wwww = w;
327 
328 				Int face[4];
329 				Float4 lodX;
330 				Float4 lodY;
331 				Float4 lodZ;
332 
333 				if(state.textureType == TEXTURE_CUBE)
334 				{
335 					cubeFace(face, uuuu, vvvv, lodX, lodY, lodZ, u, v, w);
336 				}
337 
338 				Float lod;
339 				Float anisotropy;
340 				Float4 uDelta;
341 				Float4 vDelta;
342 				Float lodBias = (function == Fetch) ? Float4(As<Int4>(q)).x : q.x;
343 
344 				if(state.textureType != TEXTURE_3D)
345 				{
346 					if(state.textureType != TEXTURE_CUBE)
347 					{
348 						computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodBias, dsx, dsy, function);
349 					}
350 					else
351 					{
352 						computeLodCube(texture, lod, lodX, lodY, lodZ, lodBias, dsx, dsy, function);
353 					}
354 				}
355 				else
356 				{
357 					computeLod3D(texture, lod, uuuu, vvvv, wwww, lodBias, dsx, dsy, function);
358 				}
359 
360 				sampleFloatFilter(texture, c, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
361 
362 				if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture())
363 				{
364 					if(has16bitTextureFormat())
365 					{
366 						switch(state.textureFormat)
367 						{
368 						case FORMAT_R5G6B5:
369 							c.x *= Float4(1.0f / 0xF800);
370 							c.y *= Float4(1.0f / 0xFC00);
371 							c.z *= Float4(1.0f / 0xF800);
372 							break;
373 						default:
374 							ASSERT(false);
375 						}
376 					}
377 					else
378 					{
379 						for(int component = 0; component < textureComponentCount(); component++)
380 						{
381 							c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
382 						}
383 					}
384 				}
385 			}
386 			else
387 			{
388 				Vector4s cs;
389 
390 				sampleTexture(texture, cs, u, v, w, q, dsx, dsy, offset, function, false);
391 
392 				if(has16bitTextureFormat())
393 				{
394 					switch(state.textureFormat)
395 					{
396 					case FORMAT_R5G6B5:
397 						if(state.sRGB)
398 						{
399 							sRGBtoLinear16_5_12(cs.x);
400 							sRGBtoLinear16_6_12(cs.y);
401 							sRGBtoLinear16_5_12(cs.z);
402 
403 							convertSigned12(c.x, cs.x);
404 							convertSigned12(c.y, cs.y);
405 							convertSigned12(c.z, cs.z);
406 						}
407 						else
408 						{
409 							c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
410 							c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
411 							c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
412 						}
413 						break;
414 					default:
415 						ASSERT(false);
416 					}
417 				}
418 				else
419 				{
420 					for(int component = 0; component < textureComponentCount(); component++)
421 					{
422 						// Normalized integer formats
423 						if(state.sRGB && isRGBComponent(component))
424 						{
425 							sRGBtoLinear16_8_12(cs[component]);   // FIXME: Perform linearization at surface level for read-only textures
426 							convertSigned12(c[component], cs[component]);
427 						}
428 						else
429 						{
430 							if(hasUnsignedTextureComponent(component))
431 							{
432 								convertUnsigned16(c[component], cs[component]);
433 							}
434 							else
435 							{
436 								convertSigned15(c[component], cs[component]);
437 							}
438 						}
439 					}
440 				}
441 			}
442 
443 			int componentCount = textureComponentCount();
444 			float defaultColorValue = colorsDefaultToZero ? 0.0f : 1.0f;
445 
446 			if(state.textureFilter != FILTER_GATHER)
447 			{
448 				switch(state.textureFormat)
449 				{
450 				case FORMAT_R8I:
451 				case FORMAT_R8UI:
452 				case FORMAT_R16I:
453 				case FORMAT_R16UI:
454 				case FORMAT_R32I:
455 				case FORMAT_R32UI:
456 					c.y = As<Float4>(UInt4(0));
457 				case FORMAT_G8R8I:
458 				case FORMAT_G8R8UI:
459 				case FORMAT_G16R16I:
460 				case FORMAT_G16R16UI:
461 				case FORMAT_G32R32I:
462 				case FORMAT_G32R32UI:
463 					c.z = As<Float4>(UInt4(0));
464 				case FORMAT_X8B8G8R8I:
465 				case FORMAT_X8B8G8R8UI:
466 				case FORMAT_X16B16G16R16I:
467 				case FORMAT_X16B16G16R16UI:
468 				case FORMAT_X32B32G32R32I:
469 				case FORMAT_X32B32G32R32UI:
470 					c.w = As<Float4>(UInt4(1));
471 				case FORMAT_A8B8G8R8I:
472 				case FORMAT_A8B8G8R8UI:
473 				case FORMAT_A16B16G16R16I:
474 				case FORMAT_A16B16G16R16UI:
475 				case FORMAT_A32B32G32R32I:
476 				case FORMAT_A32B32G32R32UI:
477 					break;
478 				case FORMAT_R8I_SNORM:
479 				case FORMAT_G8R8I_SNORM:
480 				case FORMAT_X8B8G8R8I_SNORM:
481 				case FORMAT_A8B8G8R8I_SNORM:
482 				case FORMAT_R8:
483 				case FORMAT_R5G6B5:
484 				case FORMAT_G8R8:
485 				case FORMAT_G16R16:
486 				case FORMAT_A16B16G16R16:
487 				case FORMAT_X8R8G8B8:
488 				case FORMAT_X8B8G8R8:
489 				case FORMAT_A8R8G8B8:
490 				case FORMAT_A8B8G8R8:
491 				case FORMAT_SRGB8_X8:
492 				case FORMAT_SRGB8_A8:
493 				case FORMAT_V8U8:
494 				case FORMAT_Q8W8V8U8:
495 				case FORMAT_X8L8V8U8:
496 				case FORMAT_V16U16:
497 				case FORMAT_A16W16V16U16:
498 				case FORMAT_Q16W16V16U16:
499 				case FORMAT_YV12_BT601:
500 				case FORMAT_YV12_BT709:
501 				case FORMAT_YV12_JFIF:
502 					if(componentCount < 2) c.y = Float4(defaultColorValue);
503 					if(componentCount < 3) c.z = Float4(defaultColorValue);
504 					if(componentCount < 4) c.w = Float4(1.0f);
505 					break;
506 				case FORMAT_A8:
507 					c.w = c.x;
508 					c.x = Float4(0.0f);
509 					c.y = Float4(0.0f);
510 					c.z = Float4(0.0f);
511 					break;
512 				case FORMAT_L8:
513 				case FORMAT_L16:
514 					c.y = c.x;
515 					c.z = c.x;
516 					c.w = Float4(1.0f);
517 					break;
518 				case FORMAT_A8L8:
519 					c.w = c.y;
520 					c.y = c.x;
521 					c.z = c.x;
522 					break;
523 				case FORMAT_R32F:
524 					c.y = Float4(defaultColorValue);
525 				case FORMAT_G32R32F:
526 					c.z = Float4(defaultColorValue);
527 				case FORMAT_X32B32G32R32F:
528 					c.w = Float4(1.0f);
529 				case FORMAT_A32B32G32R32F:
530 					break;
531 				case FORMAT_D32F:
532 				case FORMAT_D32F_LOCKABLE:
533 				case FORMAT_D32FS8_TEXTURE:
534 				case FORMAT_D32FS8_SHADOW:
535 					c.y = c.x;
536 					c.z = c.x;
537 					c.w = c.x;
538 					break;
539 				default:
540 					ASSERT(false);
541 				}
542 			}
543 		}
544 
545 		if((state.swizzleR != SWIZZLE_RED) ||
546 		   (state.swizzleG != SWIZZLE_GREEN) ||
547 		   (state.swizzleB != SWIZZLE_BLUE) ||
548 		   (state.swizzleA != SWIZZLE_ALPHA))
549 		{
550 			const Vector4f col(c);
551 			applySwizzle(state.swizzleR, c.x, col);
552 			applySwizzle(state.swizzleG, c.y, col);
553 			applySwizzle(state.swizzleB, c.z, col);
554 			applySwizzle(state.swizzleA, c.w, col);
555 		}
556 	}
557 
textureSize(Pointer<Byte> & texture,Vector4f & size,Float4 & lod)558 	void SamplerCore::textureSize(Pointer<Byte> &texture, Vector4f &size, Float4 &lod)
559 	{
560 		for(int i = 0; i < 4; ++i)
561 		{
562 			Int baseLevel = *Pointer<Int>(texture + OFFSET(Texture, baseLevel));
563 			Pointer<Byte> mipmap = texture + OFFSET(Texture, mipmap) + (As<Int>(Extract(lod, i)) + baseLevel) * sizeof(Mipmap);
564 			size.x = Insert(size.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
565 			size.y = Insert(size.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
566 			size.z = Insert(size.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
567 		}
568 	}
569 
border(Short4 & mask,Float4 & coordinates)570 	void SamplerCore::border(Short4 &mask, Float4 &coordinates)
571 	{
572 		Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
573 		mask = As<Short4>(Int2(As<Int4>(Pack(border, border))));
574 	}
575 
border(Int4 & mask,Float4 & coordinates)576 	void SamplerCore::border(Int4 &mask, Float4 &coordinates)
577 	{
578 		mask = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
579 	}
580 
offsetSample(Short4 & uvw,Pointer<Byte> & mipmap,int halfOffset,bool wrap,int count,Float & lod)581 	Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod)
582 	{
583 		Short4 offset = *Pointer<Short4>(mipmap + halfOffset);
584 
585 		if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
586 		{
587 			offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f)));
588 		}
589 		else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
590 		{
591 			offset &= Short4(CmpLE(Float4(lod), Float4(0.0f)));
592 		}
593 
594 		if(wrap)
595 		{
596 			switch(count)
597 			{
598 			case -1: return uvw - offset;
599 			case  0: return uvw;
600 			case +1: return uvw + offset;
601 			case  2: return uvw + offset + offset;
602 			}
603 		}
604 		else   // Clamp or mirror
605 		{
606 			switch(count)
607 			{
608 			case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
609 			case  0: return uvw;
610 			case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
611 			case  2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
612 			}
613 		}
614 
615 		return uvw;
616 	}
617 
sampleFilter(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],SamplerFunction function)618 	void SamplerCore::sampleFilter(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
619 	{
620 		sampleAniso(texture, c, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
621 
622 		if(function == Fetch)
623 		{
624 			return;
625 		}
626 
627 		if(state.mipmapFilter > MIPMAP_POINT)
628 		{
629 			Vector4s cc;
630 
631 			sampleAniso(texture, cc, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
632 
633 			lod *= Float(1 << 16);
634 
635 			UShort4 utri = UShort4(Float4(lod));   // FIXME: Optimize
636 			Short4 stri = utri >> 1;   // FIXME: Optimize
637 
638 			if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri);
639 			if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri);
640 			if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri);
641 			if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri);
642 
643 			utri = ~utri;
644 			stri = Short4(0x7FFF) - stri;
645 
646 			if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri);
647 			if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri);
648 			if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri);
649 			if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri);
650 
651 			c.x += cc.x;
652 			c.y += cc.y;
653 			c.z += cc.z;
654 			c.w += cc.w;
655 
656 			if(!hasUnsignedTextureComponent(0)) c.x += c.x;
657 			if(!hasUnsignedTextureComponent(1)) c.y += c.y;
658 			if(!hasUnsignedTextureComponent(2)) c.z += c.z;
659 			if(!hasUnsignedTextureComponent(3)) c.w += c.w;
660 		}
661 
662 		Short4 borderMask;
663 
664 		if(state.addressingModeU == ADDRESSING_BORDER)
665 		{
666 			Short4 u0;
667 
668 			border(u0, u);
669 
670 			borderMask = u0;
671 		}
672 
673 		if(state.addressingModeV == ADDRESSING_BORDER)
674 		{
675 			Short4 v0;
676 
677 			border(v0, v);
678 
679 			if(state.addressingModeU == ADDRESSING_BORDER)
680 			{
681 				borderMask &= v0;
682 			}
683 			else
684 			{
685 				borderMask = v0;
686 			}
687 		}
688 
689 		if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
690 		{
691 			Short4 s0;
692 
693 			border(s0, w);
694 
695 			if(state.addressingModeU == ADDRESSING_BORDER ||
696 			   state.addressingModeV == ADDRESSING_BORDER)
697 			{
698 				borderMask &= s0;
699 			}
700 			else
701 			{
702 				borderMask = s0;
703 			}
704 		}
705 
706 		if(state.addressingModeU == ADDRESSING_BORDER ||
707 		   state.addressingModeV == ADDRESSING_BORDER ||
708 		   (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
709 		{
710 			Short4 b;
711 
712 			c.x = (borderMask & c.x) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1)));
713 			c.y = (borderMask & c.y) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1)));
714 			c.z = (borderMask & c.z) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1)));
715 			c.w = (borderMask & c.w) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1)));
716 		}
717 	}
718 
sampleAniso(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],bool secondLOD,SamplerFunction function)719 	void SamplerCore::sampleAniso(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
720 	{
721 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
722 		{
723 			sampleQuad(texture, c, u, v, w, offset, lod, face, secondLOD, function);
724 		}
725 		else
726 		{
727 			Int a = RoundInt(anisotropy);
728 
729 			Vector4s cSum;
730 
731 			cSum.x = Short4(0);
732 			cSum.y = Short4(0);
733 			cSum.z = Short4(0);
734 			cSum.w = Short4(0);
735 
736 			Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
737 			Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
738 			UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a);
739 			Short4 sw = Short4(cw >> 1);
740 
741 			Float4 du = uDelta;
742 			Float4 dv = vDelta;
743 
744 			Float4 u0 = u + B * du;
745 			Float4 v0 = v + B * dv;
746 
747 			du *= A;
748 			dv *= A;
749 
750 			Int i = 0;
751 
752 			Do
753 			{
754 				sampleQuad(texture, c, u0, v0, w, offset, lod, face, secondLOD, function);
755 
756 				u0 += du;
757 				v0 += dv;
758 
759 				if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw);
760 				if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw);
761 				if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw);
762 				if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw);
763 
764 				i++;
765 			}
766 			Until(i >= a)
767 
768 			if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x);
769 			if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y);
770 			if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z);
771 			if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w);
772 		}
773 	}
774 
sampleQuad(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)775 	void SamplerCore::sampleQuad(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
776 	{
777 		if(state.textureType != TEXTURE_3D)
778 		{
779 			sampleQuad2D(texture, c, u, v, w, offset, lod, face, secondLOD, function);
780 		}
781 		else
782 		{
783 			sample3D(texture, c, u, v, w, offset, lod, secondLOD, function);
784 		}
785 	}
786 
sampleQuad2D(Pointer<Byte> & texture,Vector4s & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)787 	void SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
788 	{
789 		int componentCount = textureComponentCount();
790 		bool gather = state.textureFilter == FILTER_GATHER;
791 
792 		Pointer<Byte> mipmap;
793 		Pointer<Byte> buffer[4];
794 
795 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
796 
797 		bool texelFetch = (function == Fetch);
798 
799 		Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap);
800 		Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap);
801 		Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap);
802 
803 		if(state.textureFilter == FILTER_POINT || texelFetch)
804 		{
805 			c = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
806 		}
807 		else
808 		{
809 			Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
810 			Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
811 			Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
812 			Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
813 
814 			Vector4s c0 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, buffer, function);
815 			Vector4s c1 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, buffer, function);
816 			Vector4s c2 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, buffer, function);
817 			Vector4s c3 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, buffer, function);
818 
819 			if(!gather)   // Blend
820 			{
821 				// Fractions
822 				UShort4 f0u = As<UShort4>(uuuu0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
823 				UShort4 f0v = As<UShort4>(vvvv0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
824 
825 				UShort4 f1u = ~f0u;
826 				UShort4 f1v = ~f0v;
827 
828 				UShort4 f0u0v = MulHigh(f0u, f0v);
829 				UShort4 f1u0v = MulHigh(f1u, f0v);
830 				UShort4 f0u1v = MulHigh(f0u, f1v);
831 				UShort4 f1u1v = MulHigh(f1u, f1v);
832 
833 				// Signed fractions
834 				Short4 f1u1vs;
835 				Short4 f0u1vs;
836 				Short4 f1u0vs;
837 				Short4 f0u0vs;
838 
839 				if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
840 				{
841 					f1u1vs = f1u1v >> 1;
842 					f0u1vs = f0u1v >> 1;
843 					f1u0vs = f1u0v >> 1;
844 					f0u0vs = f0u0v >> 1;
845 				}
846 
847 				// Bilinear interpolation
848 				if(componentCount >= 1)
849 				{
850 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(0))
851 					{
852 						c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u);
853 						c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u);
854 						c.x  = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v);
855 					}
856 					else
857 					{
858 						if(hasUnsignedTextureComponent(0))
859 						{
860 							c0.x = MulHigh(As<UShort4>(c0.x), f1u1v);
861 							c1.x = MulHigh(As<UShort4>(c1.x), f0u1v);
862 							c2.x = MulHigh(As<UShort4>(c2.x), f1u0v);
863 							c3.x = MulHigh(As<UShort4>(c3.x), f0u0v);
864 						}
865 						else
866 						{
867 							c0.x = MulHigh(c0.x, f1u1vs);
868 							c1.x = MulHigh(c1.x, f0u1vs);
869 							c2.x = MulHigh(c2.x, f1u0vs);
870 							c3.x = MulHigh(c3.x, f0u0vs);
871 						}
872 
873 						c.x = (c0.x + c1.x) + (c2.x + c3.x);
874 						if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x);   // Correct for signed fractions
875 					}
876 				}
877 
878 				if(componentCount >= 2)
879 				{
880 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(1))
881 					{
882 						c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u);
883 						c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u);
884 						c.y  = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v);
885 					}
886 					else
887 					{
888 						if(hasUnsignedTextureComponent(1))
889 						{
890 							c0.y = MulHigh(As<UShort4>(c0.y), f1u1v);
891 							c1.y = MulHigh(As<UShort4>(c1.y), f0u1v);
892 							c2.y = MulHigh(As<UShort4>(c2.y), f1u0v);
893 							c3.y = MulHigh(As<UShort4>(c3.y), f0u0v);
894 						}
895 						else
896 						{
897 							c0.y = MulHigh(c0.y, f1u1vs);
898 							c1.y = MulHigh(c1.y, f0u1vs);
899 							c2.y = MulHigh(c2.y, f1u0vs);
900 							c3.y = MulHigh(c3.y, f0u0vs);
901 						}
902 
903 						c.y = (c0.y + c1.y) + (c2.y + c3.y);
904 						if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y);   // Correct for signed fractions
905 					}
906 				}
907 
908 				if(componentCount >= 3)
909 				{
910 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(2))
911 					{
912 						c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u);
913 						c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u);
914 						c.z  = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v);
915 					}
916 					else
917 					{
918 						if(hasUnsignedTextureComponent(2))
919 						{
920 							c0.z = MulHigh(As<UShort4>(c0.z), f1u1v);
921 							c1.z = MulHigh(As<UShort4>(c1.z), f0u1v);
922 							c2.z = MulHigh(As<UShort4>(c2.z), f1u0v);
923 							c3.z = MulHigh(As<UShort4>(c3.z), f0u0v);
924 						}
925 						else
926 						{
927 							c0.z = MulHigh(c0.z, f1u1vs);
928 							c1.z = MulHigh(c1.z, f0u1vs);
929 							c2.z = MulHigh(c2.z, f1u0vs);
930 							c3.z = MulHigh(c3.z, f0u0vs);
931 						}
932 
933 						c.z = (c0.z + c1.z) + (c2.z + c3.z);
934 						if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z);   // Correct for signed fractions
935 					}
936 				}
937 
938 				if(componentCount >= 4)
939 				{
940 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(3))
941 					{
942 						c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u);
943 						c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u);
944 						c.w  = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v);
945 					}
946 					else
947 					{
948 						if(hasUnsignedTextureComponent(3))
949 						{
950 							c0.w = MulHigh(As<UShort4>(c0.w), f1u1v);
951 							c1.w = MulHigh(As<UShort4>(c1.w), f0u1v);
952 							c2.w = MulHigh(As<UShort4>(c2.w), f1u0v);
953 							c3.w = MulHigh(As<UShort4>(c3.w), f0u0v);
954 						}
955 						else
956 						{
957 							c0.w = MulHigh(c0.w, f1u1vs);
958 							c1.w = MulHigh(c1.w, f0u1vs);
959 							c2.w = MulHigh(c2.w, f1u0vs);
960 							c3.w = MulHigh(c3.w, f0u0vs);
961 						}
962 
963 						c.w = (c0.w + c1.w) + (c2.w + c3.w);
964 						if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w);   // Correct for signed fractions
965 					}
966 				}
967 			}
968 			else
969 			{
970 				c.x = c1.x;
971 				c.y = c2.x;
972 				c.z = c3.x;
973 				c.w = c0.x;
974 			}
975 		}
976 	}
977 
sample3D(Pointer<Byte> & texture,Vector4s & c_,Float4 & u_,Float4 & v_,Float4 & w_,Vector4f & offset,Float & lod,bool secondLOD,SamplerFunction function)978 	void SamplerCore::sample3D(Pointer<Byte> &texture, Vector4s &c_, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
979 	{
980 		int componentCount = textureComponentCount();
981 
982 		Pointer<Byte> mipmap;
983 		Pointer<Byte> buffer[4];
984 		Int face[4];
985 
986 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
987 
988 		bool texelFetch = (function == Fetch);
989 
990 		Short4 uuuu = texelFetch ? Short4(As<Int4>(u_)) : address(u_, state.addressingModeU, mipmap);
991 		Short4 vvvv = texelFetch ? Short4(As<Int4>(v_)) : address(v_, state.addressingModeV, mipmap);
992 		Short4 wwww = texelFetch ? Short4(As<Int4>(w_)) : address(w_, state.addressingModeW, mipmap);
993 
994 		if(state.textureFilter == FILTER_POINT || texelFetch)
995 		{
996 			c_ = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
997 		}
998 		else
999 		{
1000 			Vector4s c[2][2][2];
1001 
1002 			Short4 u[2][2][2];
1003 			Short4 v[2][2][2];
1004 			Short4 s[2][2][2];
1005 
1006 			for(int i = 0; i < 2; i++)
1007 			{
1008 				for(int j = 0; j < 2; j++)
1009 				{
1010 					for(int k = 0; k < 2; k++)
1011 					{
1012 						u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
1013 						v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
1014 						s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
1015 					}
1016 				}
1017 			}
1018 
1019 			// Fractions
1020 			UShort4 f0u = As<UShort4>(u[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
1021 			UShort4 f0v = As<UShort4>(v[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
1022 			UShort4 f0s = As<UShort4>(s[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,depth));
1023 
1024 			UShort4 f1u = ~f0u;
1025 			UShort4 f1v = ~f0v;
1026 			UShort4 f1s = ~f0s;
1027 
1028 			UShort4 f[2][2][2];
1029 			Short4 fs[2][2][2];
1030 
1031 			f[1][1][1] = MulHigh(f1u, f1v);
1032 			f[0][1][1] = MulHigh(f0u, f1v);
1033 			f[1][0][1] = MulHigh(f1u, f0v);
1034 			f[0][0][1] = MulHigh(f0u, f0v);
1035 			f[1][1][0] = MulHigh(f1u, f1v);
1036 			f[0][1][0] = MulHigh(f0u, f1v);
1037 			f[1][0][0] = MulHigh(f1u, f0v);
1038 			f[0][0][0] = MulHigh(f0u, f0v);
1039 
1040 			f[1][1][1] = MulHigh(f[1][1][1], f1s);
1041 			f[0][1][1] = MulHigh(f[0][1][1], f1s);
1042 			f[1][0][1] = MulHigh(f[1][0][1], f1s);
1043 			f[0][0][1] = MulHigh(f[0][0][1], f1s);
1044 			f[1][1][0] = MulHigh(f[1][1][0], f0s);
1045 			f[0][1][0] = MulHigh(f[0][1][0], f0s);
1046 			f[1][0][0] = MulHigh(f[1][0][0], f0s);
1047 			f[0][0][0] = MulHigh(f[0][0][0], f0s);
1048 
1049 			// Signed fractions
1050 			if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
1051 			{
1052 				fs[0][0][0] = f[0][0][0] >> 1;
1053 				fs[0][0][1] = f[0][0][1] >> 1;
1054 				fs[0][1][0] = f[0][1][0] >> 1;
1055 				fs[0][1][1] = f[0][1][1] >> 1;
1056 				fs[1][0][0] = f[1][0][0] >> 1;
1057 				fs[1][0][1] = f[1][0][1] >> 1;
1058 				fs[1][1][0] = f[1][1][0] >> 1;
1059 				fs[1][1][1] = f[1][1][1] >> 1;
1060 			}
1061 
1062 			for(int i = 0; i < 2; i++)
1063 			{
1064 				for(int j = 0; j < 2; j++)
1065 				{
1066 					for(int k = 0; k < 2; k++)
1067 					{
1068 						c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function);
1069 
1070 						if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); }
1071 						if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); }
1072 						if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); }
1073 						if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); }
1074 
1075 						if(i != 0 || j != 0 || k != 0)
1076 						{
1077 							if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x;
1078 							if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y;
1079 							if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z;
1080 							if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w;
1081 						}
1082 					}
1083 				}
1084 			}
1085 
1086 			if(componentCount >= 1) c_.x = c[0][0][0].x;
1087 			if(componentCount >= 2) c_.y = c[0][0][0].y;
1088 			if(componentCount >= 3) c_.z = c[0][0][0].z;
1089 			if(componentCount >= 4) c_.w = c[0][0][0].w;
1090 
1091 			// Correct for signed fractions
1092 			if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
1093 			if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
1094 			if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
1095 			if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
1096 		}
1097 	}
1098 
sampleFloatFilter(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],SamplerFunction function)1099 	void SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
1100 	{
1101 		sampleFloatAniso(texture, c, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
1102 
1103 		if(function == Fetch)
1104 		{
1105 			return;
1106 		}
1107 
1108 		if(state.mipmapFilter > MIPMAP_POINT)
1109 		{
1110 			Vector4f cc;
1111 
1112 			sampleFloatAniso(texture, cc, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
1113 
1114 			Float4 lod4 = Float4(Frac(lod));
1115 
1116 			c.x = (cc.x - c.x) * lod4 + c.x;
1117 			c.y = (cc.y - c.y) * lod4 + c.y;
1118 			c.z = (cc.z - c.z) * lod4 + c.z;
1119 			c.w = (cc.w - c.w) * lod4 + c.w;
1120 		}
1121 
1122 		Int4 borderMask;
1123 
1124 		if(state.addressingModeU == ADDRESSING_BORDER)
1125 		{
1126 			Int4 u0;
1127 
1128 			border(u0, u);
1129 
1130 			borderMask = u0;
1131 		}
1132 
1133 		if(state.addressingModeV == ADDRESSING_BORDER)
1134 		{
1135 			Int4 v0;
1136 
1137 			border(v0, v);
1138 
1139 			if(state.addressingModeU == ADDRESSING_BORDER)
1140 			{
1141 				borderMask &= v0;
1142 			}
1143 			else
1144 			{
1145 				borderMask = v0;
1146 			}
1147 		}
1148 
1149 		if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
1150 		{
1151 			Int4 s0;
1152 
1153 			border(s0, w);
1154 
1155 			if(state.addressingModeU == ADDRESSING_BORDER ||
1156 			   state.addressingModeV == ADDRESSING_BORDER)
1157 			{
1158 				borderMask &= s0;
1159 			}
1160 			else
1161 			{
1162 				borderMask = s0;
1163 			}
1164 		}
1165 
1166 		if(state.addressingModeU == ADDRESSING_BORDER ||
1167 		   state.addressingModeV == ADDRESSING_BORDER ||
1168 		   (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
1169 		{
1170 			Int4 b;
1171 
1172 			c.x = As<Float4>((borderMask & As<Int4>(c.x)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0]))));
1173 			c.y = As<Float4>((borderMask & As<Int4>(c.y)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1]))));
1174 			c.z = As<Float4>((borderMask & As<Int4>(c.z)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2]))));
1175 			c.w = As<Float4>((borderMask & As<Int4>(c.w)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3]))));
1176 		}
1177 	}
1178 
sampleFloatAniso(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Int face[4],bool secondLOD,SamplerFunction function)1179 	void SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
1180 	{
1181 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
1182 		{
1183 			sampleFloat(texture, c, u, v, w, offset, lod, face, secondLOD, function);
1184 		}
1185 		else
1186 		{
1187 			Int a = RoundInt(anisotropy);
1188 
1189 			Vector4f cSum;
1190 
1191 			cSum.x = Float4(0.0f);
1192 			cSum.y = Float4(0.0f);
1193 			cSum.z = Float4(0.0f);
1194 			cSum.w = Float4(0.0f);
1195 
1196 			Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
1197 			Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
1198 
1199 			Float4 du = uDelta;
1200 			Float4 dv = vDelta;
1201 
1202 			Float4 u0 = u + B * du;
1203 			Float4 v0 = v + B * dv;
1204 
1205 			du *= A;
1206 			dv *= A;
1207 
1208 			Int i = 0;
1209 
1210 			Do
1211 			{
1212 				sampleFloat(texture, c, u0, v0, w, offset, lod, face, secondLOD, function);
1213 
1214 				u0 += du;
1215 				v0 += dv;
1216 
1217 				cSum.x += c.x * A;
1218 				cSum.y += c.y * A;
1219 				cSum.z += c.z * A;
1220 				cSum.w += c.w * A;
1221 
1222 				i++;
1223 			}
1224 			Until(i >= a)
1225 
1226 			c.x = cSum.x;
1227 			c.y = cSum.y;
1228 			c.z = cSum.z;
1229 			c.w = cSum.w;
1230 		}
1231 	}
1232 
sampleFloat(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)1233 	void SamplerCore::sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
1234 	{
1235 		if(state.textureType != TEXTURE_3D)
1236 		{
1237 			sampleFloat2D(texture, c, u, v, w, offset, lod, face, secondLOD, function);
1238 		}
1239 		else
1240 		{
1241 			sampleFloat3D(texture, c, u, v, w, offset, lod, secondLOD, function);
1242 		}
1243 	}
1244 
sampleFloat2D(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,Int face[4],bool secondLOD,SamplerFunction function)1245 	void SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
1246 	{
1247 		int componentCount = textureComponentCount();
1248 		bool gather = state.textureFilter == FILTER_GATHER;
1249 
1250 		Pointer<Byte> mipmap;
1251 		Pointer<Byte> buffer[4];
1252 
1253 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
1254 
1255 		Int4 x0, x1, y0, y1, z0;
1256 		Float4 fu, fv;
1257 		Int4 filter = computeFilterOffset(lod);
1258 		address(w, z0, z0, fv, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
1259 		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
1260 		address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
1261 
1262 		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
1263 		y0 *= pitchP;
1264 		if(hasThirdCoordinate())
1265 		{
1266 			Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
1267 			z0 *= sliceP;
1268 		}
1269 
1270 		if(state.textureFilter == FILTER_POINT || (function == Fetch))
1271 		{
1272 			c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
1273 		}
1274 		else
1275 		{
1276 			y1 *= pitchP;
1277 
1278 			Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
1279 			Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function);
1280 			Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function);
1281 			Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function);
1282 
1283 			if(!gather)   // Blend
1284 			{
1285 				if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
1286 				if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
1287 				if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
1288 				if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
1289 
1290 				if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
1291 				if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
1292 				if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
1293 				if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
1294 
1295 				if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x);
1296 				if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y);
1297 				if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z);
1298 				if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w);
1299 			}
1300 			else
1301 			{
1302 				c.x = c1.x;
1303 				c.y = c2.x;
1304 				c.z = c3.x;
1305 				c.w = c0.x;
1306 			}
1307 		}
1308 	}
1309 
sampleFloat3D(Pointer<Byte> & texture,Vector4f & c,Float4 & u,Float4 & v,Float4 & w,Vector4f & offset,Float & lod,bool secondLOD,SamplerFunction function)1310 	void SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
1311 	{
1312 		int componentCount = textureComponentCount();
1313 
1314 		Pointer<Byte> mipmap;
1315 		Pointer<Byte> buffer[4];
1316 		Int face[4];
1317 
1318 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
1319 
1320 		Int4 x0, x1, y0, y1, z0, z1;
1321 		Float4 fu, fv, fw;
1322 		Int4 filter = computeFilterOffset(lod);
1323 		address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
1324 		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
1325 		address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
1326 
1327 		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
1328 		Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
1329 		y0 *= pitchP;
1330 		z0 *= sliceP;
1331 
1332 		if(state.textureFilter == FILTER_POINT || (function == Fetch))
1333 		{
1334 			c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
1335 		}
1336 		else
1337 		{
1338 			y1 *= pitchP;
1339 			z1 *= sliceP;
1340 
1341 			Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
1342 			Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function);
1343 			Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function);
1344 			Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function);
1345 			Vector4f c4 = sampleTexel(x0, y0, z1, w, mipmap, buffer, function);
1346 			Vector4f c5 = sampleTexel(x1, y0, z1, w, mipmap, buffer, function);
1347 			Vector4f c6 = sampleTexel(x0, y1, z1, w, mipmap, buffer, function);
1348 			Vector4f c7 = sampleTexel(x1, y1, z1, w, mipmap, buffer, function);
1349 
1350 			// Blend first slice
1351 			if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
1352 			if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
1353 			if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
1354 			if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
1355 
1356 			if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
1357 			if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
1358 			if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
1359 			if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
1360 
1361 			if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x);
1362 			if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y);
1363 			if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z);
1364 			if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w);
1365 
1366 			// Blend second slice
1367 			if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x);
1368 			if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y);
1369 			if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z);
1370 			if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w);
1371 
1372 			if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x);
1373 			if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y);
1374 			if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z);
1375 			if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w);
1376 
1377 			if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x);
1378 			if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y);
1379 			if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z);
1380 			if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w);
1381 
1382 			// Blend slices
1383 			if(componentCount >= 1) c.x = c0.x + fw * (c4.x - c0.x);
1384 			if(componentCount >= 2) c.y = c0.y + fw * (c4.y - c0.y);
1385 			if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z);
1386 			if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w);
1387 		}
1388 	}
1389 
log2sqrt(Float lod)1390 	Float SamplerCore::log2sqrt(Float lod)
1391 	{
1392 		// log2(sqrt(lod))                               // Equals 0.25 * log2(lod^2).
1393 		lod *= lod;                                      // Squaring doubles the exponent and produces an extra bit of precision.
1394 		lod = Float(As<Int>(lod)) - Float(0x3F800000);   // Interpret as integer and subtract the exponent bias.
1395 		lod *= As<Float>(Int(0x33000000));               // Scale by 0.25 * 2^-23 (mantissa length).
1396 
1397 		return lod;
1398 	}
1399 
computeLod(Pointer<Byte> & texture,Float & lod,Float & anisotropy,Float4 & uDelta,Float4 & vDelta,Float4 & uuuu,Float4 & vvvv,const Float & lodBias,Vector4f & dsx,Vector4f & dsy,SamplerFunction function)1400 	void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
1401 	{
1402 		if(function != Lod && function != Fetch)
1403 		{
1404 			Float4 duvdxy;
1405 
1406 			if(function != Grad)
1407 			{
1408 				duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
1409 			}
1410 			else
1411 			{
1412 				Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
1413 				Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
1414 
1415 				duvdxy = Float4(dudxy.xz, dvdxy.xz);
1416 			}
1417 
1418 			// Scale by texture dimensions and LOD
1419 			Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthHeightLOD));
1420 
1421 			Float4 dUV2dxy = dUVdxy * dUVdxy;
1422 			Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
1423 
1424 			lod = Max(Float(dUV2.x), Float(dUV2.y));   // Square length of major axis
1425 
1426 			if(state.textureFilter == FILTER_ANISOTROPIC)
1427 			{
1428 				Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z));
1429 
1430 				Float4 dudx = duvdxy.xxxx;
1431 				Float4 dudy = duvdxy.yyyy;
1432 				Float4 dvdx = duvdxy.zzzz;
1433 				Float4 dvdy = duvdxy.wwww;
1434 
1435 				Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y));
1436 				uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask)));
1437 				vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask)));
1438 
1439 				anisotropy = lod * Rcp_pp(det);
1440 				anisotropy = Min(anisotropy, *Pointer<Float>(texture + OFFSET(Texture,maxAnisotropy)));
1441 
1442 				lod *= Rcp_pp(anisotropy * anisotropy);
1443 			}
1444 
1445 			lod = log2sqrt(lod);   // log2(sqrt(lod))
1446 
1447 			if(function == Bias)
1448 			{
1449 				lod += lodBias;
1450 			}
1451 		}
1452 		else
1453 		{
1454 			lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel)));
1455 		}
1456 
1457 		lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
1458 		lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
1459 	}
1460 
computeLodCube(Pointer<Byte> & texture,Float & lod,Float4 & u,Float4 & v,Float4 & s,const Float & lodBias,Vector4f & dsx,Vector4f & dsy,SamplerFunction function)1461 	void SamplerCore::computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &s, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
1462 	{
1463 		if(function != Lod && function != Fetch)
1464 		{
1465 			if(function != Grad)
1466 			{
1467 				Float4 dudxy = u.ywyw - u;
1468 				Float4 dvdxy = v.ywyw - v;
1469 				Float4 dsdxy = s.ywyw - s;
1470 
1471 				// Scale by texture dimensions and LOD
1472 				dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
1473 				dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
1474 				dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
1475 
1476 				dudxy *= dudxy;
1477 				dvdxy *= dvdxy;
1478 				dsdxy *= dsdxy;
1479 
1480 				dudxy += dvdxy;
1481 				dudxy += dsdxy;
1482 
1483 				lod = Max(Float(dudxy.x), Float(dudxy.y));   // FIXME: Max(dudxy.x, dudxy.y);
1484 			}
1485 			else
1486 			{
1487 				Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
1488 				Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
1489 
1490 				Float4 duvdxy = Float4(dudxy.xz, dvdxy.xz);
1491 
1492 				// Scale by texture dimensions and LOD
1493 				Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
1494 
1495 				Float4 dUV2dxy = dUVdxy * dUVdxy;
1496 				Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
1497 
1498 				lod = Max(Float(dUV2.x), Float(dUV2.y));   // Square length of major axis
1499 			}
1500 
1501 			lod = log2sqrt(lod);   // log2(sqrt(lod))
1502 
1503 			if(function == Bias)
1504 			{
1505 				lod += lodBias;
1506 			}
1507 		}
1508 		else
1509 		{
1510 			lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel)));
1511 		}
1512 
1513 		lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
1514 		lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
1515 	}
1516 
computeLod3D(Pointer<Byte> & texture,Float & lod,Float4 & uuuu,Float4 & vvvv,Float4 & wwww,const Float & lodBias,Vector4f & dsx,Vector4f & dsy,SamplerFunction function)1517 	void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
1518 	{
1519 		if(state.mipmapFilter == MIPMAP_NONE)
1520 		{
1521 		}
1522 		else   // Point and linear filter
1523 		{
1524 			if(function != Lod && function != Fetch)
1525 			{
1526 				Float4 dudxy;
1527 				Float4 dvdxy;
1528 				Float4 dsdxy;
1529 
1530 				if(function != Grad)
1531 				{
1532 					dudxy = uuuu.ywyw - uuuu;
1533 					dvdxy = vvvv.ywyw - vvvv;
1534 					dsdxy = wwww.ywyw - wwww;
1535 				}
1536 				else
1537 				{
1538 					dudxy = dsx.x;
1539 					dvdxy = dsx.y;
1540 					dsdxy = dsx.z;
1541 
1542 					dudxy = Float4(dudxy.xx, dsy.x.xx);
1543 					dvdxy = Float4(dvdxy.xx, dsy.y.xx);
1544 					dsdxy = Float4(dsdxy.xx, dsy.z.xx);
1545 
1546 					dudxy = Float4(dudxy.xz, dudxy.xz);
1547 					dvdxy = Float4(dvdxy.xz, dvdxy.xz);
1548 					dsdxy = Float4(dsdxy.xz, dsdxy.xz);
1549 				}
1550 
1551 				// Scale by texture dimensions and LOD
1552 				dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
1553 				dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,heightLOD));
1554 				dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,depthLOD));
1555 
1556 				dudxy *= dudxy;
1557 				dvdxy *= dvdxy;
1558 				dsdxy *= dsdxy;
1559 
1560 				dudxy += dvdxy;
1561 				dudxy += dsdxy;
1562 
1563 				lod = Max(Float(dudxy.x), Float(dudxy.y));   // FIXME: Max(dudxy.x, dudxy.y);
1564 
1565 				lod = log2sqrt(lod);   // log2(sqrt(lod))
1566 
1567 				if(function == Bias)
1568 				{
1569 					lod += lodBias;
1570 				}
1571 			}
1572 			else
1573 			{
1574 				lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel)));
1575 			}
1576 
1577 			lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
1578 			lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
1579 		}
1580 	}
1581 
cubeFace(Int face[4],Float4 & U,Float4 & V,Float4 & lodX,Float4 & lodY,Float4 & lodZ,Float4 & x,Float4 & y,Float4 & z)1582 	void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodX, Float4 &lodY, Float4 &lodZ, Float4 &x, Float4 &y, Float4 &z)
1583 	{
1584 		Int4 xn = CmpLT(x, Float4(0.0f));   // x < 0
1585 		Int4 yn = CmpLT(y, Float4(0.0f));   // y < 0
1586 		Int4 zn = CmpLT(z, Float4(0.0f));   // z < 0
1587 
1588 		Float4 absX = Abs(x);
1589 		Float4 absY = Abs(y);
1590 		Float4 absZ = Abs(z);
1591 
1592 		Int4 xy = CmpNLE(absX, absY);   // abs(x) > abs(y)
1593 		Int4 yz = CmpNLE(absY, absZ);   // abs(y) > abs(z)
1594 		Int4 zx = CmpNLE(absZ, absX);   // abs(z) > abs(x)
1595 		Int4 xMajor = xy & ~zx;   // abs(x) > abs(y) && abs(x) > abs(z)
1596 		Int4 yMajor = yz & ~xy;   // abs(y) > abs(z) && abs(y) > abs(x)
1597 		Int4 zMajor = zx & ~yz;   // abs(z) > abs(x) && abs(z) > abs(y)
1598 
1599 		// FACE_POSITIVE_X = 000b
1600 		// FACE_NEGATIVE_X = 001b
1601 		// FACE_POSITIVE_Y = 010b
1602 		// FACE_NEGATIVE_Y = 011b
1603 		// FACE_POSITIVE_Z = 100b
1604 		// FACE_NEGATIVE_Z = 101b
1605 
1606 		Int yAxis = SignMask(yMajor);
1607 		Int zAxis = SignMask(zMajor);
1608 
1609 		Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
1610 		Int negative = SignMask(n);
1611 
1612 		face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
1613 		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
1614 		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
1615 		face[1] = (face[0] >> 4)  & 0x7;
1616 		face[2] = (face[0] >> 8)  & 0x7;
1617 		face[3] = (face[0] >> 12) & 0x7;
1618 		face[0] &= 0x7;
1619 
1620 		Float4 M = Max(Max(absX, absY), absZ);
1621 
1622 		// U = xMajor ? (neg ^ -z) : (zMajor & neg) ^ x)
1623 		U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x))));
1624 
1625 		// V = !yMajor ? -y : (n ^ z)
1626 		V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z))));
1627 
1628 		M = reciprocal(M) * Float4(0.5f);
1629 		U = U * M + Float4(0.5f);
1630 		V = V * M + Float4(0.5f);
1631 
1632 		lodX = x * M;
1633 		lodY = y * M;
1634 		lodZ = z * M;
1635 	}
1636 
applyOffset(Short4 & uvw,Float4 & offset,const Int4 & whd,AddressingMode mode)1637 	Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode)
1638 	{
1639 		Int4 tmp = Int4(As<UShort4>(uvw));
1640 		tmp = tmp + As<Int4>(offset);
1641 
1642 		switch (mode)
1643 		{
1644 		case AddressingMode::ADDRESSING_WRAP:
1645 			tmp = (tmp + whd * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % whd;
1646 			break;
1647 		case AddressingMode::ADDRESSING_CLAMP:
1648 		case AddressingMode::ADDRESSING_MIRROR:
1649 		case AddressingMode::ADDRESSING_MIRRORONCE:
1650 		case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
1651 			tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
1652 			break;
1653 		case ADDRESSING_TEXELFETCH:
1654 			break;
1655 		default:
1656 			ASSERT(false);
1657 		}
1658 
1659 		return As<Short4>(UShort4(tmp));
1660 	}
1661 
computeIndices(UInt index[4],Short4 uuuu,Short4 vvvv,Short4 wwww,Vector4f & offset,const Pointer<Byte> & mipmap,SamplerFunction function)1662 	void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function)
1663 	{
1664 		bool texelFetch = (function == Fetch);
1665 		bool hasOffset = (function.option == Offset);
1666 
1667 		if(!texelFetch)
1668 		{
1669 			uuuu = MulHigh(As<UShort4>(uuuu), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width)));
1670 			vvvv = MulHigh(As<UShort4>(vvvv), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height)));
1671 		}
1672 
1673 		if(hasOffset)
1674 		{
1675 			uuuu = applyOffset(uuuu, offset.x, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, width))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU);
1676 			vvvv = applyOffset(vvvv, offset.y, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, height))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV);
1677 		}
1678 
1679 		Short4 uuu2 = uuuu;
1680 		uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
1681 		uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
1682 		uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
1683 		uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
1684 
1685 		if(hasThirdCoordinate())
1686 		{
1687 			if(state.textureType != TEXTURE_2D_ARRAY)
1688 			{
1689 				if(!texelFetch)
1690 				{
1691 					wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth)));
1692 				}
1693 				if(hasOffset)
1694 				{
1695 					wwww = applyOffset(wwww, offset.z, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeW);
1696 				}
1697 			}
1698 			Short4 www2 = wwww;
1699 			wwww = As<Short4>(UnpackLow(wwww, Short4(0x0000)));
1700 			www2 = As<Short4>(UnpackHigh(www2, Short4(0x0000)));
1701 			wwww = As<Short4>(MulAdd(wwww, *Pointer<Short4>(mipmap + OFFSET(Mipmap,sliceP))));
1702 			www2 = As<Short4>(MulAdd(www2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,sliceP))));
1703 			uuuu = As<Short4>(As<Int2>(uuuu) + As<Int2>(wwww));
1704 			uuu2 = As<Short4>(As<Int2>(uuu2) + As<Int2>(www2));
1705 		}
1706 
1707 		index[0] = Extract(As<Int2>(uuuu), 0);
1708 		index[1] = Extract(As<Int2>(uuuu), 1);
1709 		index[2] = Extract(As<Int2>(uuu2), 0);
1710 		index[3] = Extract(As<Int2>(uuu2), 1);
1711 
1712 		if(texelFetch)
1713 		{
1714 			Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP)));
1715 			if(hasThirdCoordinate())
1716 			{
1717 				size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)));
1718 			}
1719 			UInt min = 0;
1720 			UInt max = size - 1;
1721 
1722 			for(int i = 0; i < 4; i++)
1723 			{
1724 				index[i] = Min(Max(index[i], min), max);
1725 			}
1726 		}
1727 	}
1728 
computeIndices(UInt index[4],Int4 & uuuu,Int4 & vvvv,Int4 & wwww,const Pointer<Byte> & mipmap,SamplerFunction function)1729 	void SamplerCore::computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function)
1730 	{
1731 		UInt4 indices = uuuu + vvvv;
1732 
1733 		if(hasThirdCoordinate())
1734 		{
1735 			indices += As<UInt4>(wwww);
1736 		}
1737 
1738 		for(int i = 0; i < 4; i++)
1739 		{
1740 			index[i] = Extract(As<Int4>(indices), i);
1741 		}
1742 	}
1743 
sampleTexel(UInt index[4],Pointer<Byte> buffer[4])1744 	Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4])
1745 	{
1746 		Vector4s c;
1747 
1748 		int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
1749 		int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
1750 		int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
1751 		int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
1752 
1753 		if(has16bitTextureFormat())
1754 		{
1755 			c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
1756 			c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
1757 			c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
1758 			c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
1759 
1760 			switch(state.textureFormat)
1761 			{
1762 			case FORMAT_R5G6B5:
1763 				c.z = (c.x & Short4(0x001Fu)) << 11;
1764 				c.y = (c.x & Short4(0x07E0u)) << 5;
1765 				c.x = (c.x & Short4(0xF800u));
1766 				break;
1767 			default:
1768 				ASSERT(false);
1769 			}
1770 		}
1771 		else if(has8bitTextureComponents())
1772 		{
1773 			switch(textureComponentCount())
1774 			{
1775 			case 4:
1776 				{
1777 					Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
1778 					Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
1779 					Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
1780 					Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
1781 					c.x = Unpack(c0, c1);
1782 					c.y = Unpack(c2, c3);
1783 
1784 					switch(state.textureFormat)
1785 					{
1786 					case FORMAT_A8R8G8B8:
1787 						c.z = As<Short4>(UnpackLow(c.x, c.y));
1788 						c.x = As<Short4>(UnpackHigh(c.x, c.y));
1789 						c.y = c.z;
1790 						c.w = c.x;
1791 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1792 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1793 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1794 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
1795 						break;
1796 					case FORMAT_A8B8G8R8:
1797 					case FORMAT_A8B8G8R8I:
1798 					case FORMAT_A8B8G8R8I_SNORM:
1799 					case FORMAT_Q8W8V8U8:
1800 					case FORMAT_SRGB8_A8:
1801 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
1802 						c.x = As<Short4>(UnpackLow(c.x, c.y));
1803 						c.y = c.x;
1804 						c.w = c.z;
1805 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1806 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1807 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1808 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
1809 						// Propagate sign bit
1810 						if(state.textureFormat == FORMAT_A8B8G8R8I)
1811 						{
1812 							c.x >>= 8;
1813 							c.y >>= 8;
1814 							c.z >>= 8;
1815 							c.w >>= 8;
1816 						}
1817 						break;
1818 					case FORMAT_A8B8G8R8UI:
1819 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
1820 						c.x = As<Short4>(UnpackLow(c.x, c.y));
1821 						c.y = c.x;
1822 						c.w = c.z;
1823 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
1824 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
1825 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
1826 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
1827 						break;
1828 					default:
1829 						ASSERT(false);
1830 					}
1831 				}
1832 				break;
1833 			case 3:
1834 				{
1835 					Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
1836 					Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
1837 					Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
1838 					Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
1839 					c.x = Unpack(c0, c1);
1840 					c.y = Unpack(c2, c3);
1841 
1842 					switch(state.textureFormat)
1843 					{
1844 					case FORMAT_X8R8G8B8:
1845 						c.z = As<Short4>(UnpackLow(c.x, c.y));
1846 						c.x = As<Short4>(UnpackHigh(c.x, c.y));
1847 						c.y = c.z;
1848 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1849 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1850 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1851 						break;
1852 					case FORMAT_X8B8G8R8I_SNORM:
1853 					case FORMAT_X8B8G8R8I:
1854 					case FORMAT_X8B8G8R8:
1855 					case FORMAT_X8L8V8U8:
1856 					case FORMAT_SRGB8_X8:
1857 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
1858 						c.x = As<Short4>(UnpackLow(c.x, c.y));
1859 						c.y = c.x;
1860 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
1861 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
1862 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
1863 						// Propagate sign bit
1864 						if(state.textureFormat == FORMAT_X8B8G8R8I)
1865 						{
1866 							c.x >>= 8;
1867 							c.y >>= 8;
1868 							c.z >>= 8;
1869 						}
1870 						break;
1871 					case FORMAT_X8B8G8R8UI:
1872 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
1873 						c.x = As<Short4>(UnpackLow(c.x, c.y));
1874 						c.y = c.x;
1875 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
1876 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
1877 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
1878 						break;
1879 					default:
1880 						ASSERT(false);
1881 					}
1882 				}
1883 				break;
1884 			case 2:
1885 				c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
1886 				c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
1887 				c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
1888 				c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
1889 
1890 				switch(state.textureFormat)
1891 				{
1892 				case FORMAT_G8R8:
1893 				case FORMAT_G8R8I_SNORM:
1894 				case FORMAT_V8U8:
1895 				case FORMAT_A8L8:
1896 					c.y = (c.x & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8);
1897 					c.x = (c.x & Short4(0x00FFu)) | (c.x << 8);
1898 					break;
1899 				case FORMAT_G8R8I:
1900 					c.y = c.x >> 8;
1901 					c.x = (c.x << 8) >> 8; // Propagate sign bit
1902 					break;
1903 				case FORMAT_G8R8UI:
1904 					c.y = As<Short4>(As<UShort4>(c.x) >> 8);
1905 					c.x &= Short4(0x00FFu);
1906 					break;
1907 				default:
1908 					ASSERT(false);
1909 				}
1910 				break;
1911 			case 1:
1912 				{
1913 					Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0]));
1914 					Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
1915 					Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
1916 					Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
1917 					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
1918 
1919 					switch(state.textureFormat)
1920 					{
1921 					case FORMAT_R8I:
1922 					case FORMAT_R8UI:
1923 						{
1924 							Int zero(0);
1925 							c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero));
1926 							// Propagate sign bit
1927 							if(state.textureFormat == FORMAT_R8I)
1928 							{
1929 								c.x = (c.x << 8) >> 8;
1930 							}
1931 						}
1932 						break;
1933 					default:
1934 						c.x = Unpack(As<Byte4>(c0));
1935 						break;
1936 					}
1937 				}
1938 				break;
1939 			default:
1940 				ASSERT(false);
1941 			}
1942 		}
1943 		else if(has16bitTextureComponents())
1944 		{
1945 			switch(textureComponentCount())
1946 			{
1947 			case 4:
1948 				c.x = Pointer<Short4>(buffer[f0])[index[0]];
1949 				c.y = Pointer<Short4>(buffer[f1])[index[1]];
1950 				c.z = Pointer<Short4>(buffer[f2])[index[2]];
1951 				c.w = Pointer<Short4>(buffer[f3])[index[3]];
1952 				transpose4x4(c.x, c.y, c.z, c.w);
1953 				break;
1954 			case 2:
1955 				c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
1956 				c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
1957 				c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]);
1958 				c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3])));
1959 				c.y = c.x;
1960 				c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
1961 				c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
1962 				break;
1963 			case 1:
1964 				c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
1965 				c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
1966 				c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
1967 				c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
1968 				break;
1969 			default:
1970 				ASSERT(false);
1971 			}
1972 		}
1973 		else ASSERT(false);
1974 
1975 		return c;
1976 	}
1977 
sampleTexel(Short4 & uuuu,Short4 & vvvv,Short4 & wwww,Vector4f & offset,Pointer<Byte> & mipmap,Pointer<Byte> buffer[4],SamplerFunction function)1978 	Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
1979 	{
1980 		Vector4s c;
1981 
1982 		UInt index[4];
1983 		computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
1984 
1985 		if(hasYuvFormat())
1986 		{
1987 			// Generic YPbPr to RGB transformation
1988 			// R = Y                               +           2 * (1 - Kr) * Pr
1989 			// G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr
1990 			// B = Y +           2 * (1 - Kb) * Pb
1991 
1992 			float Kb = 0.114f;
1993 			float Kr = 0.299f;
1994 			int studioSwing = 1;
1995 
1996 			switch(state.textureFormat)
1997 			{
1998 			case FORMAT_YV12_BT601:
1999 				Kb = 0.114f;
2000 				Kr = 0.299f;
2001 				studioSwing = 1;
2002 				break;
2003 			case FORMAT_YV12_BT709:
2004 				Kb = 0.0722f;
2005 				Kr = 0.2126f;
2006 				studioSwing = 1;
2007 				break;
2008 			case FORMAT_YV12_JFIF:
2009 				Kb = 0.114f;
2010 				Kr = 0.299f;
2011 				studioSwing = 0;
2012 				break;
2013 			default:
2014 				ASSERT(false);
2015 			}
2016 
2017 			const float Kg = 1.0f - Kr - Kb;
2018 
2019 			const float Rr = 2 * (1 - Kr);
2020 			const float Gb = -2 * Kb * (1 - Kb) / Kg;
2021 			const float Gr = -2 * Kr * (1 - Kr) / Kg;
2022 			const float Bb = 2 * (1 - Kb);
2023 
2024 			// Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
2025 			const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f;
2026 			const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f;
2027 			const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f;
2028 
2029 			const float Rv = Vv *  Rr;
2030 			const float Gu = Uu *  Gb;
2031 			const float Gv = Vv *  Gr;
2032 			const float Bu = Uu *  Bb;
2033 
2034 			const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255;
2035 			const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255;
2036 			const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255;
2037 
2038 			Int c0 = Int(buffer[0][index[0]]);
2039 			Int c1 = Int(buffer[0][index[1]]);
2040 			Int c2 = Int(buffer[0][index[2]]);
2041 			Int c3 = Int(buffer[0][index[3]]);
2042 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
2043 			UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
2044 
2045 			computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
2046 			c0 = Int(buffer[1][index[0]]);
2047 			c1 = Int(buffer[1][index[1]]);
2048 			c2 = Int(buffer[1][index[2]]);
2049 			c3 = Int(buffer[1][index[3]]);
2050 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
2051 			UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
2052 
2053 			c0 = Int(buffer[2][index[0]]);
2054 			c1 = Int(buffer[2][index[1]]);
2055 			c2 = Int(buffer[2][index[2]]);
2056 			c3 = Int(buffer[2][index[3]]);
2057 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
2058 			UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
2059 
2060 			const UShort4 yY = UShort4(iround(Yy * 0x4000));
2061 			const UShort4 rV = UShort4(iround(Rv * 0x4000));
2062 			const UShort4 gU = UShort4(iround(-Gu * 0x4000));
2063 			const UShort4 gV = UShort4(iround(-Gv * 0x4000));
2064 			const UShort4 bU = UShort4(iround(Bu * 0x4000));
2065 
2066 			const UShort4 r0 = UShort4(iround(-R0 * 0x4000));
2067 			const UShort4 g0 = UShort4(iround(G0 * 0x4000));
2068 			const UShort4 b0 = UShort4(iround(-B0 * 0x4000));
2069 
2070 			UShort4 y = MulHigh(Y, yY);
2071 			UShort4 r = SubSat(y + MulHigh(V, rV), r0);
2072 			UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV));
2073 			UShort4 b = SubSat(y + MulHigh(U, bU), b0);
2074 
2075 			c.x = Min(r, UShort4(0x3FFF)) << 2;
2076 			c.y = Min(g, UShort4(0x3FFF)) << 2;
2077 			c.z = Min(b, UShort4(0x3FFF)) << 2;
2078 		}
2079 		else
2080 		{
2081 			return sampleTexel(index, buffer);
2082 		}
2083 
2084 		return c;
2085 	}
2086 
sampleTexel(Int4 & uuuu,Int4 & vvvv,Int4 & wwww,Float4 & z,Pointer<Byte> & mipmap,Pointer<Byte> buffer[4],SamplerFunction function)2087 	Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
2088 	{
2089 		Vector4f c;
2090 
2091 		UInt index[4];
2092 		computeIndices(index, uuuu, vvvv, wwww, mipmap, function);
2093 
2094 		if(hasFloatTexture() || has32bitIntegerTextureComponents())
2095 		{
2096 			int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
2097 			int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
2098 			int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
2099 			int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
2100 
2101 			// Read texels
2102 			switch(textureComponentCount())
2103 			{
2104 			case 4:
2105 				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
2106 				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
2107 				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
2108 				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
2109 				transpose4x4(c.x, c.y, c.z, c.w);
2110 				break;
2111 			case 3:
2112 				ASSERT(state.textureFormat == FORMAT_X32B32G32R32F);
2113 				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
2114 				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
2115 				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
2116 				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
2117 				transpose4x3(c.x, c.y, c.z, c.w);
2118 				c.w = Float4(1.0f);
2119 				break;
2120 			case 2:
2121 				// FIXME: Optimal shuffling?
2122 				c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
2123 				c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
2124 				c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
2125 				c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
2126 				c.y = c.x;
2127 				c.x = Float4(c.x.xz, c.z.xz);
2128 				c.y = Float4(c.y.yw, c.z.yw);
2129 				break;
2130 			case 1:
2131 				// FIXME: Optimal shuffling?
2132 				c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
2133 				c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
2134 				c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
2135 				c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
2136 
2137 				if(state.textureFormat == FORMAT_D32FS8_SHADOW && state.textureFilter != FILTER_GATHER)
2138 				{
2139 					Float4 d = Min(Max(z, Float4(0.0f)), Float4(1.0f));
2140 
2141 					c.x = As<Float4>(As<Int4>(CmpNLT(c.x, d)) & As<Int4>(Float4(1.0f)));   // FIXME: Only less-equal?
2142 				}
2143 				break;
2144 			default:
2145 				ASSERT(false);
2146 			}
2147 		}
2148 		else
2149 		{
2150 			ASSERT(!hasYuvFormat());
2151 
2152 			Vector4s cs = sampleTexel(index, buffer);
2153 
2154 			bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat);
2155 			int componentCount = textureComponentCount();
2156 			for(int n = 0; n < componentCount; ++n)
2157 			{
2158 				if(hasUnsignedTextureComponent(n))
2159 				{
2160 					if(isInteger)
2161 					{
2162 						c[n] = As<Float4>(Int4(As<UShort4>(cs[n])));
2163 					}
2164 					else
2165 					{
2166 						c[n] = Float4(As<UShort4>(cs[n]));
2167 					}
2168 				}
2169 				else
2170 				{
2171 					if(isInteger)
2172 					{
2173 						c[n] = As<Float4>(Int4(cs[n]));
2174 					}
2175 					else
2176 					{
2177 						c[n] = Float4(cs[n]);
2178 					}
2179 				}
2180 			}
2181 		}
2182 
2183 		return c;
2184 	}
2185 
selectMipmap(Pointer<Byte> & texture,Pointer<Byte> buffer[4],Pointer<Byte> & mipmap,Float & lod,Int face[4],bool secondLOD)2186 	void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD)
2187 	{
2188 		if(state.mipmapFilter < MIPMAP_POINT)
2189 		{
2190 			mipmap = texture + OFFSET(Texture,mipmap[0]);
2191 		}
2192 		else
2193 		{
2194 			Int ilod;
2195 
2196 			if(state.mipmapFilter == MIPMAP_POINT)
2197 			{
2198 				ilod = RoundInt(lod);
2199 			}
2200 			else   // Linear
2201 			{
2202 				ilod = Int(lod);
2203 			}
2204 
2205 			mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
2206 		}
2207 
2208 		if(state.textureType != TEXTURE_CUBE)
2209 		{
2210 			buffer[0] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[0]));
2211 
2212 			if(hasYuvFormat())
2213 			{
2214 				buffer[1] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[1]));
2215 				buffer[2] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[2]));
2216 			}
2217 		}
2218 		else
2219 		{
2220 			for(int i = 0; i < 4; i++)
2221 			{
2222 				buffer[i] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*));
2223 			}
2224 		}
2225 	}
2226 
computeFilterOffset(Float & lod)2227 	Int4 SamplerCore::computeFilterOffset(Float &lod)
2228 	{
2229 		Int4 filtering((state.textureFilter == FILTER_POINT) ? 0 : 1);
2230 		if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
2231 		{
2232 			filtering &= CmpNLE(Float4(lod), Float4(0.0f));
2233 		}
2234 		else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
2235 		{
2236 			filtering &= CmpLE(Float4(lod), Float4(0.0f));
2237 		}
2238 
2239 		return filtering;
2240 	}
2241 
address(Float4 & uw,AddressingMode addressingMode,Pointer<Byte> & mipmap)2242 	Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap)
2243 	{
2244 		if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
2245 		{
2246 			return Short4();   // Unused
2247 		}
2248 		else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY)
2249 		{
2250 			return Min(Max(Short4(RoundInt(uw)), Short4(0)), *Pointer<Short4>(mipmap + OFFSET(Mipmap, depth)) - Short4(1));
2251 		}
2252 		else if(addressingMode == ADDRESSING_CLAMP)
2253 		{
2254 			Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f));
2255 
2256 			return Short4(Int4(clamp * Float4(1 << 16)));
2257 		}
2258 		else if(addressingMode == ADDRESSING_MIRROR)
2259 		{
2260 			Int4 convert = Int4(uw * Float4(1 << 16));
2261 			Int4 mirror = (convert << 15) >> 31;
2262 
2263 			convert ^= mirror;
2264 
2265 			return Short4(convert);
2266 		}
2267 		else if(addressingMode == ADDRESSING_MIRRORONCE)
2268 		{
2269 			// Absolute value
2270 			Int4 convert = Int4(Abs(uw * Float4(1 << 16)));
2271 
2272 			// Clamp
2273 			convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000);
2274 			convert = As<Int4>(Pack(convert, convert));
2275 
2276 			return As<Short4>(Int2(convert)) + Short4(0x8000u);
2277 		}
2278 		else   // Wrap (or border)
2279 		{
2280 			return Short4(Int4(uw * Float4(1 << 16)));
2281 		}
2282 	}
2283 
address(Float4 & uvw,Int4 & xyz0,Int4 & xyz1,Float4 & f,Pointer<Byte> & mipmap,Float4 & texOffset,Int4 & filter,int whd,AddressingMode addressingMode,SamplerFunction function)2284 	void SamplerCore::address(Float4 &uvw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function)
2285 	{
2286 		if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
2287 		{
2288 			return; // Unused
2289 		}
2290 
2291 		Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16));
2292 		Int4 maxXYZ = dim - Int4(1);
2293 
2294 		if(function == Fetch)
2295 		{
2296 			xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ);
2297 		}
2298 		else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) // Note: Offset does not apply to array layers
2299 		{
2300 			xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ);
2301 		}
2302 		else
2303 		{
2304 			const int halfBits = 0x3effffff; // Value just under 0.5f
2305 			const int oneBits  = 0x3f7fffff; // Value just under 1.0f
2306 			const int twoBits  = 0x3fffffff; // Value just under 2.0f
2307 
2308 			Float4 coord = Float4(dim);
2309 			switch(addressingMode)
2310 			{
2311 			case ADDRESSING_CLAMP:
2312 				{
2313 					Float4 one = As<Float4>(Int4(oneBits));
2314 					coord *= Min(Max(uvw, Float4(0.0f)), one);
2315 				}
2316 				break;
2317 			case ADDRESSING_MIRROR:
2318 				{
2319 					Float4 half = As<Float4>(Int4(halfBits));
2320 					Float4 one = As<Float4>(Int4(oneBits));
2321 					Float4 two = As<Float4>(Int4(twoBits));
2322 					coord *= one - Abs(two * Frac(uvw * half) - one);
2323 				}
2324 				break;
2325 			case ADDRESSING_MIRRORONCE:
2326 				{
2327 					Float4 half = As<Float4>(Int4(halfBits));
2328 					Float4 one = As<Float4>(Int4(oneBits));
2329 					Float4 two = As<Float4>(Int4(twoBits));
2330 					coord *= one - Abs(two * Frac(Min(Max(uvw, -one), two) * half) - one);
2331 				}
2332 				break;
2333 			default:   // Wrap (or border)
2334 				coord *= Frac(uvw);
2335 				break;
2336 			}
2337 
2338 			xyz0 = Int4(coord);
2339 
2340 			if(function.option == Offset)
2341 			{
2342 				xyz0 += As<Int4>(texOffset);
2343 				switch(addressingMode)
2344 				{
2345 				case ADDRESSING_MIRROR:
2346 				case ADDRESSING_MIRRORONCE:
2347 				case ADDRESSING_BORDER:
2348 					// FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp.
2349 				case ADDRESSING_CLAMP:
2350 					xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
2351 					break;
2352 				default:   // Wrap
2353 					xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
2354 					break;
2355 				}
2356 			}
2357 
2358 			if(state.textureFilter != FILTER_POINT) // Compute 2nd coordinate, if needed
2359 			{
2360 				bool gather = state.textureFilter == FILTER_GATHER;
2361 
2362 				xyz1 = xyz0 + filter; // Increment
2363 
2364 				if(!gather)
2365 				{
2366 					Float4 frac = Frac(coord);
2367 					f = Abs(frac - Float4(0.5f));
2368 					xyz1 -= CmpLT(frac, Float4(0.5f)) & (filter + filter); // Decrement xyz if necessary
2369 				}
2370 
2371 				switch(addressingMode)
2372 				{
2373 				case ADDRESSING_MIRROR:
2374 				case ADDRESSING_MIRRORONCE:
2375 				case ADDRESSING_BORDER:
2376 					// FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp.
2377 				case ADDRESSING_CLAMP:
2378 					xyz1 = gather ? Min(xyz1, maxXYZ) : Min(Max(xyz1, Int4(0)), maxXYZ);
2379 					break;
2380 				default:   // Wrap
2381 					{
2382 						// The coordinates overflow or underflow by at most 1
2383 						Int4 over = CmpNLT(xyz1, dim);
2384 						xyz1 = (over & Int4(0)) | (~over & xyz1); // xyz >= dim ? 0 : xyz
2385 						if(!gather)
2386 						{
2387 							Int4 under = CmpLT(xyz1, Int4(0));
2388 							xyz1 = (under & maxXYZ) | (~under & xyz1); // xyz < 0 ? dim - 1 : xyz
2389 						}
2390 					}
2391 					break;
2392 				}
2393 			}
2394 		}
2395 	}
2396 
convertFixed12(Short4 & cs,Float4 & cf)2397 	void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf)
2398 	{
2399 		cs = RoundShort4(cf * Float4(0x1000));
2400 	}
2401 
convertFixed12(Vector4s & cs,Vector4f & cf)2402 	void SamplerCore::convertFixed12(Vector4s &cs, Vector4f &cf)
2403 	{
2404 		convertFixed12(cs.x, cf.x);
2405 		convertFixed12(cs.y, cf.y);
2406 		convertFixed12(cs.z, cf.z);
2407 		convertFixed12(cs.w, cf.w);
2408 	}
2409 
convertSigned12(Float4 & cf,Short4 & cs)2410 	void SamplerCore::convertSigned12(Float4 &cf, Short4 &cs)
2411 	{
2412 		cf = Float4(cs) * Float4(1.0f / 0x0FFE);
2413 	}
2414 
2415 //	void SamplerCore::convertSigned12(Vector4f &cf, Vector4s &cs)
2416 //	{
2417 //		convertSigned12(cf.x, cs.x);
2418 //		convertSigned12(cf.y, cs.y);
2419 //		convertSigned12(cf.z, cs.z);
2420 //		convertSigned12(cf.w, cs.w);
2421 //	}
2422 
convertSigned15(Float4 & cf,Short4 & cs)2423 	void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs)
2424 	{
2425 		cf = Float4(cs) * Float4(1.0f / 0x7FFF);
2426 	}
2427 
convertUnsigned16(Float4 & cf,Short4 & cs)2428 	void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs)
2429 	{
2430 		cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF);
2431 	}
2432 
sRGBtoLinear16_8_12(Short4 & c)2433 	void SamplerCore::sRGBtoLinear16_8_12(Short4 &c)
2434 	{
2435 		c = As<UShort4>(c) >> 8;
2436 
2437 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_12));
2438 
2439 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
2440 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
2441 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
2442 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
2443 	}
2444 
sRGBtoLinear16_6_12(Short4 & c)2445 	void SamplerCore::sRGBtoLinear16_6_12(Short4 &c)
2446 	{
2447 		c = As<UShort4>(c) >> 10;
2448 
2449 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_12));
2450 
2451 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
2452 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
2453 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
2454 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
2455 	}
2456 
sRGBtoLinear16_5_12(Short4 & c)2457 	void SamplerCore::sRGBtoLinear16_5_12(Short4 &c)
2458 	{
2459 		c = As<UShort4>(c) >> 11;
2460 
2461 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_12));
2462 
2463 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
2464 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
2465 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
2466 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
2467 	}
2468 
hasFloatTexture() const2469 	bool SamplerCore::hasFloatTexture() const
2470 	{
2471 		return Surface::isFloatFormat(state.textureFormat);
2472 	}
2473 
hasUnnormalizedIntegerTexture() const2474 	bool SamplerCore::hasUnnormalizedIntegerTexture() const
2475 	{
2476 		return Surface::isNonNormalizedInteger(state.textureFormat);
2477 	}
2478 
hasUnsignedTextureComponent(int component) const2479 	bool SamplerCore::hasUnsignedTextureComponent(int component) const
2480 	{
2481 		return Surface::isUnsignedComponent(state.textureFormat, component);
2482 	}
2483 
textureComponentCount() const2484 	int SamplerCore::textureComponentCount() const
2485 	{
2486 		return Surface::componentCount(state.textureFormat);
2487 	}
2488 
hasThirdCoordinate() const2489 	bool SamplerCore::hasThirdCoordinate() const
2490 	{
2491 		return (state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY);
2492 	}
2493 
has16bitTextureFormat() const2494 	bool SamplerCore::has16bitTextureFormat() const
2495 	{
2496 		switch(state.textureFormat)
2497 		{
2498 		case FORMAT_R5G6B5:
2499 			return true;
2500 		case FORMAT_R8I_SNORM:
2501 		case FORMAT_G8R8I_SNORM:
2502 		case FORMAT_X8B8G8R8I_SNORM:
2503 		case FORMAT_A8B8G8R8I_SNORM:
2504 		case FORMAT_R8I:
2505 		case FORMAT_R8UI:
2506 		case FORMAT_G8R8I:
2507 		case FORMAT_G8R8UI:
2508 		case FORMAT_X8B8G8R8I:
2509 		case FORMAT_X8B8G8R8UI:
2510 		case FORMAT_A8B8G8R8I:
2511 		case FORMAT_A8B8G8R8UI:
2512 		case FORMAT_R32I:
2513 		case FORMAT_R32UI:
2514 		case FORMAT_G32R32I:
2515 		case FORMAT_G32R32UI:
2516 		case FORMAT_X32B32G32R32I:
2517 		case FORMAT_X32B32G32R32UI:
2518 		case FORMAT_A32B32G32R32I:
2519 		case FORMAT_A32B32G32R32UI:
2520 		case FORMAT_G8R8:
2521 		case FORMAT_X8R8G8B8:
2522 		case FORMAT_X8B8G8R8:
2523 		case FORMAT_A8R8G8B8:
2524 		case FORMAT_A8B8G8R8:
2525 		case FORMAT_SRGB8_X8:
2526 		case FORMAT_SRGB8_A8:
2527 		case FORMAT_V8U8:
2528 		case FORMAT_Q8W8V8U8:
2529 		case FORMAT_X8L8V8U8:
2530 		case FORMAT_R32F:
2531 		case FORMAT_G32R32F:
2532 		case FORMAT_X32B32G32R32F:
2533 		case FORMAT_A32B32G32R32F:
2534 		case FORMAT_A8:
2535 		case FORMAT_R8:
2536 		case FORMAT_L8:
2537 		case FORMAT_A8L8:
2538 		case FORMAT_D32F:
2539 		case FORMAT_D32F_LOCKABLE:
2540 		case FORMAT_D32FS8_TEXTURE:
2541 		case FORMAT_D32FS8_SHADOW:
2542 		case FORMAT_L16:
2543 		case FORMAT_G16R16:
2544 		case FORMAT_A16B16G16R16:
2545 		case FORMAT_V16U16:
2546 		case FORMAT_A16W16V16U16:
2547 		case FORMAT_Q16W16V16U16:
2548 		case FORMAT_R16I:
2549 		case FORMAT_R16UI:
2550 		case FORMAT_G16R16I:
2551 		case FORMAT_G16R16UI:
2552 		case FORMAT_X16B16G16R16I:
2553 		case FORMAT_X16B16G16R16UI:
2554 		case FORMAT_A16B16G16R16I:
2555 		case FORMAT_A16B16G16R16UI:
2556 		case FORMAT_YV12_BT601:
2557 		case FORMAT_YV12_BT709:
2558 		case FORMAT_YV12_JFIF:
2559 			return false;
2560 		default:
2561 			ASSERT(false);
2562 		}
2563 
2564 		return false;
2565 	}
2566 
has8bitTextureComponents() const2567 	bool SamplerCore::has8bitTextureComponents() const
2568 	{
2569 		switch(state.textureFormat)
2570 		{
2571 		case FORMAT_G8R8:
2572 		case FORMAT_X8R8G8B8:
2573 		case FORMAT_X8B8G8R8:
2574 		case FORMAT_A8R8G8B8:
2575 		case FORMAT_A8B8G8R8:
2576 		case FORMAT_SRGB8_X8:
2577 		case FORMAT_SRGB8_A8:
2578 		case FORMAT_V8U8:
2579 		case FORMAT_Q8W8V8U8:
2580 		case FORMAT_X8L8V8U8:
2581 		case FORMAT_A8:
2582 		case FORMAT_R8:
2583 		case FORMAT_L8:
2584 		case FORMAT_A8L8:
2585 		case FORMAT_R8I_SNORM:
2586 		case FORMAT_G8R8I_SNORM:
2587 		case FORMAT_X8B8G8R8I_SNORM:
2588 		case FORMAT_A8B8G8R8I_SNORM:
2589 		case FORMAT_R8I:
2590 		case FORMAT_R8UI:
2591 		case FORMAT_G8R8I:
2592 		case FORMAT_G8R8UI:
2593 		case FORMAT_X8B8G8R8I:
2594 		case FORMAT_X8B8G8R8UI:
2595 		case FORMAT_A8B8G8R8I:
2596 		case FORMAT_A8B8G8R8UI:
2597 			return true;
2598 		case FORMAT_R5G6B5:
2599 		case FORMAT_R32F:
2600 		case FORMAT_G32R32F:
2601 		case FORMAT_X32B32G32R32F:
2602 		case FORMAT_A32B32G32R32F:
2603 		case FORMAT_D32F:
2604 		case FORMAT_D32F_LOCKABLE:
2605 		case FORMAT_D32FS8_TEXTURE:
2606 		case FORMAT_D32FS8_SHADOW:
2607 		case FORMAT_L16:
2608 		case FORMAT_G16R16:
2609 		case FORMAT_A16B16G16R16:
2610 		case FORMAT_V16U16:
2611 		case FORMAT_A16W16V16U16:
2612 		case FORMAT_Q16W16V16U16:
2613 		case FORMAT_R32I:
2614 		case FORMAT_R32UI:
2615 		case FORMAT_G32R32I:
2616 		case FORMAT_G32R32UI:
2617 		case FORMAT_X32B32G32R32I:
2618 		case FORMAT_X32B32G32R32UI:
2619 		case FORMAT_A32B32G32R32I:
2620 		case FORMAT_A32B32G32R32UI:
2621 		case FORMAT_R16I:
2622 		case FORMAT_R16UI:
2623 		case FORMAT_G16R16I:
2624 		case FORMAT_G16R16UI:
2625 		case FORMAT_X16B16G16R16I:
2626 		case FORMAT_X16B16G16R16UI:
2627 		case FORMAT_A16B16G16R16I:
2628 		case FORMAT_A16B16G16R16UI:
2629 		case FORMAT_YV12_BT601:
2630 		case FORMAT_YV12_BT709:
2631 		case FORMAT_YV12_JFIF:
2632 			return false;
2633 		default:
2634 			ASSERT(false);
2635 		}
2636 
2637 		return false;
2638 	}
2639 
has16bitTextureComponents() const2640 	bool SamplerCore::has16bitTextureComponents() const
2641 	{
2642 		switch(state.textureFormat)
2643 		{
2644 		case FORMAT_R5G6B5:
2645 		case FORMAT_R8I_SNORM:
2646 		case FORMAT_G8R8I_SNORM:
2647 		case FORMAT_X8B8G8R8I_SNORM:
2648 		case FORMAT_A8B8G8R8I_SNORM:
2649 		case FORMAT_R8I:
2650 		case FORMAT_R8UI:
2651 		case FORMAT_G8R8I:
2652 		case FORMAT_G8R8UI:
2653 		case FORMAT_X8B8G8R8I:
2654 		case FORMAT_X8B8G8R8UI:
2655 		case FORMAT_A8B8G8R8I:
2656 		case FORMAT_A8B8G8R8UI:
2657 		case FORMAT_R32I:
2658 		case FORMAT_R32UI:
2659 		case FORMAT_G32R32I:
2660 		case FORMAT_G32R32UI:
2661 		case FORMAT_X32B32G32R32I:
2662 		case FORMAT_X32B32G32R32UI:
2663 		case FORMAT_A32B32G32R32I:
2664 		case FORMAT_A32B32G32R32UI:
2665 		case FORMAT_G8R8:
2666 		case FORMAT_X8R8G8B8:
2667 		case FORMAT_X8B8G8R8:
2668 		case FORMAT_A8R8G8B8:
2669 		case FORMAT_A8B8G8R8:
2670 		case FORMAT_SRGB8_X8:
2671 		case FORMAT_SRGB8_A8:
2672 		case FORMAT_V8U8:
2673 		case FORMAT_Q8W8V8U8:
2674 		case FORMAT_X8L8V8U8:
2675 		case FORMAT_R32F:
2676 		case FORMAT_G32R32F:
2677 		case FORMAT_X32B32G32R32F:
2678 		case FORMAT_A32B32G32R32F:
2679 		case FORMAT_A8:
2680 		case FORMAT_R8:
2681 		case FORMAT_L8:
2682 		case FORMAT_A8L8:
2683 		case FORMAT_D32F:
2684 		case FORMAT_D32F_LOCKABLE:
2685 		case FORMAT_D32FS8_TEXTURE:
2686 		case FORMAT_D32FS8_SHADOW:
2687 		case FORMAT_YV12_BT601:
2688 		case FORMAT_YV12_BT709:
2689 		case FORMAT_YV12_JFIF:
2690 			return false;
2691 		case FORMAT_L16:
2692 		case FORMAT_G16R16:
2693 		case FORMAT_A16B16G16R16:
2694 		case FORMAT_R16I:
2695 		case FORMAT_R16UI:
2696 		case FORMAT_G16R16I:
2697 		case FORMAT_G16R16UI:
2698 		case FORMAT_X16B16G16R16I:
2699 		case FORMAT_X16B16G16R16UI:
2700 		case FORMAT_A16B16G16R16I:
2701 		case FORMAT_A16B16G16R16UI:
2702 		case FORMAT_V16U16:
2703 		case FORMAT_A16W16V16U16:
2704 		case FORMAT_Q16W16V16U16:
2705 			return true;
2706 		default:
2707 			ASSERT(false);
2708 		}
2709 
2710 		return false;
2711 	}
2712 
has32bitIntegerTextureComponents() const2713 	bool SamplerCore::has32bitIntegerTextureComponents() const
2714 	{
2715 		switch(state.textureFormat)
2716 		{
2717 		case FORMAT_R5G6B5:
2718 		case FORMAT_R8I_SNORM:
2719 		case FORMAT_G8R8I_SNORM:
2720 		case FORMAT_X8B8G8R8I_SNORM:
2721 		case FORMAT_A8B8G8R8I_SNORM:
2722 		case FORMAT_R8I:
2723 		case FORMAT_R8UI:
2724 		case FORMAT_G8R8I:
2725 		case FORMAT_G8R8UI:
2726 		case FORMAT_X8B8G8R8I:
2727 		case FORMAT_X8B8G8R8UI:
2728 		case FORMAT_A8B8G8R8I:
2729 		case FORMAT_A8B8G8R8UI:
2730 		case FORMAT_G8R8:
2731 		case FORMAT_X8R8G8B8:
2732 		case FORMAT_X8B8G8R8:
2733 		case FORMAT_A8R8G8B8:
2734 		case FORMAT_A8B8G8R8:
2735 		case FORMAT_SRGB8_X8:
2736 		case FORMAT_SRGB8_A8:
2737 		case FORMAT_V8U8:
2738 		case FORMAT_Q8W8V8U8:
2739 		case FORMAT_X8L8V8U8:
2740 		case FORMAT_L16:
2741 		case FORMAT_G16R16:
2742 		case FORMAT_A16B16G16R16:
2743 		case FORMAT_R16I:
2744 		case FORMAT_R16UI:
2745 		case FORMAT_G16R16I:
2746 		case FORMAT_G16R16UI:
2747 		case FORMAT_X16B16G16R16I:
2748 		case FORMAT_X16B16G16R16UI:
2749 		case FORMAT_A16B16G16R16I:
2750 		case FORMAT_A16B16G16R16UI:
2751 		case FORMAT_V16U16:
2752 		case FORMAT_A16W16V16U16:
2753 		case FORMAT_Q16W16V16U16:
2754 		case FORMAT_R32F:
2755 		case FORMAT_G32R32F:
2756 		case FORMAT_X32B32G32R32F:
2757 		case FORMAT_A32B32G32R32F:
2758 		case FORMAT_A8:
2759 		case FORMAT_R8:
2760 		case FORMAT_L8:
2761 		case FORMAT_A8L8:
2762 		case FORMAT_D32F:
2763 		case FORMAT_D32F_LOCKABLE:
2764 		case FORMAT_D32FS8_TEXTURE:
2765 		case FORMAT_D32FS8_SHADOW:
2766 		case FORMAT_YV12_BT601:
2767 		case FORMAT_YV12_BT709:
2768 		case FORMAT_YV12_JFIF:
2769 			return false;
2770 		case FORMAT_R32I:
2771 		case FORMAT_R32UI:
2772 		case FORMAT_G32R32I:
2773 		case FORMAT_G32R32UI:
2774 		case FORMAT_X32B32G32R32I:
2775 		case FORMAT_X32B32G32R32UI:
2776 		case FORMAT_A32B32G32R32I:
2777 		case FORMAT_A32B32G32R32UI:
2778 			return true;
2779 		default:
2780 			ASSERT(false);
2781 		}
2782 
2783 		return false;
2784 	}
2785 
hasYuvFormat() const2786 	bool SamplerCore::hasYuvFormat() const
2787 	{
2788 		switch(state.textureFormat)
2789 		{
2790 		case FORMAT_YV12_BT601:
2791 		case FORMAT_YV12_BT709:
2792 		case FORMAT_YV12_JFIF:
2793 			return true;
2794 		case FORMAT_R5G6B5:
2795 		case FORMAT_R8I_SNORM:
2796 		case FORMAT_G8R8I_SNORM:
2797 		case FORMAT_X8B8G8R8I_SNORM:
2798 		case FORMAT_A8B8G8R8I_SNORM:
2799 		case FORMAT_R8I:
2800 		case FORMAT_R8UI:
2801 		case FORMAT_G8R8I:
2802 		case FORMAT_G8R8UI:
2803 		case FORMAT_X8B8G8R8I:
2804 		case FORMAT_X8B8G8R8UI:
2805 		case FORMAT_A8B8G8R8I:
2806 		case FORMAT_A8B8G8R8UI:
2807 		case FORMAT_R32I:
2808 		case FORMAT_R32UI:
2809 		case FORMAT_G32R32I:
2810 		case FORMAT_G32R32UI:
2811 		case FORMAT_X32B32G32R32I:
2812 		case FORMAT_X32B32G32R32UI:
2813 		case FORMAT_A32B32G32R32I:
2814 		case FORMAT_A32B32G32R32UI:
2815 		case FORMAT_G8R8:
2816 		case FORMAT_X8R8G8B8:
2817 		case FORMAT_X8B8G8R8:
2818 		case FORMAT_A8R8G8B8:
2819 		case FORMAT_A8B8G8R8:
2820 		case FORMAT_SRGB8_X8:
2821 		case FORMAT_SRGB8_A8:
2822 		case FORMAT_V8U8:
2823 		case FORMAT_Q8W8V8U8:
2824 		case FORMAT_X8L8V8U8:
2825 		case FORMAT_R32F:
2826 		case FORMAT_G32R32F:
2827 		case FORMAT_X32B32G32R32F:
2828 		case FORMAT_A32B32G32R32F:
2829 		case FORMAT_A8:
2830 		case FORMAT_R8:
2831 		case FORMAT_L8:
2832 		case FORMAT_A8L8:
2833 		case FORMAT_D32F:
2834 		case FORMAT_D32F_LOCKABLE:
2835 		case FORMAT_D32FS8_TEXTURE:
2836 		case FORMAT_D32FS8_SHADOW:
2837 		case FORMAT_L16:
2838 		case FORMAT_G16R16:
2839 		case FORMAT_A16B16G16R16:
2840 		case FORMAT_R16I:
2841 		case FORMAT_R16UI:
2842 		case FORMAT_G16R16I:
2843 		case FORMAT_G16R16UI:
2844 		case FORMAT_X16B16G16R16I:
2845 		case FORMAT_X16B16G16R16UI:
2846 		case FORMAT_A16B16G16R16I:
2847 		case FORMAT_A16B16G16R16UI:
2848 		case FORMAT_V16U16:
2849 		case FORMAT_A16W16V16U16:
2850 		case FORMAT_Q16W16V16U16:
2851 			return false;
2852 		default:
2853 			ASSERT(false);
2854 		}
2855 
2856 		return false;
2857 	}
2858 
isRGBComponent(int component) const2859 	bool SamplerCore::isRGBComponent(int component) const
2860 	{
2861 		switch(state.textureFormat)
2862 		{
2863 		case FORMAT_R5G6B5:         return component < 3;
2864 		case FORMAT_R8I_SNORM:      return component < 1;
2865 		case FORMAT_G8R8I_SNORM:    return component < 2;
2866 		case FORMAT_X8B8G8R8I_SNORM: return component < 3;
2867 		case FORMAT_A8B8G8R8I_SNORM: return component < 3;
2868 		case FORMAT_R8I:            return component < 1;
2869 		case FORMAT_R8UI:           return component < 1;
2870 		case FORMAT_G8R8I:          return component < 2;
2871 		case FORMAT_G8R8UI:         return component < 2;
2872 		case FORMAT_X8B8G8R8I:      return component < 3;
2873 		case FORMAT_X8B8G8R8UI:     return component < 3;
2874 		case FORMAT_A8B8G8R8I:      return component < 3;
2875 		case FORMAT_A8B8G8R8UI:     return component < 3;
2876 		case FORMAT_R32I:           return component < 1;
2877 		case FORMAT_R32UI:          return component < 1;
2878 		case FORMAT_G32R32I:        return component < 2;
2879 		case FORMAT_G32R32UI:       return component < 2;
2880 		case FORMAT_X32B32G32R32I:  return component < 3;
2881 		case FORMAT_X32B32G32R32UI: return component < 3;
2882 		case FORMAT_A32B32G32R32I:  return component < 3;
2883 		case FORMAT_A32B32G32R32UI: return component < 3;
2884 		case FORMAT_G8R8:           return component < 2;
2885 		case FORMAT_X8R8G8B8:       return component < 3;
2886 		case FORMAT_X8B8G8R8:       return component < 3;
2887 		case FORMAT_A8R8G8B8:       return component < 3;
2888 		case FORMAT_A8B8G8R8:       return component < 3;
2889 		case FORMAT_SRGB8_X8:       return component < 3;
2890 		case FORMAT_SRGB8_A8:       return component < 3;
2891 		case FORMAT_V8U8:           return false;
2892 		case FORMAT_Q8W8V8U8:       return false;
2893 		case FORMAT_X8L8V8U8:       return false;
2894 		case FORMAT_R32F:           return component < 1;
2895 		case FORMAT_G32R32F:        return component < 2;
2896 		case FORMAT_X32B32G32R32F:  return component < 3;
2897 		case FORMAT_A32B32G32R32F:  return component < 3;
2898 		case FORMAT_A8:             return false;
2899 		case FORMAT_R8:             return component < 1;
2900 		case FORMAT_L8:             return component < 1;
2901 		case FORMAT_A8L8:           return component < 1;
2902 		case FORMAT_D32F:           return false;
2903 		case FORMAT_D32F_LOCKABLE:  return false;
2904 		case FORMAT_D32FS8_TEXTURE: return false;
2905 		case FORMAT_D32FS8_SHADOW:  return false;
2906 		case FORMAT_L16:            return component < 1;
2907 		case FORMAT_G16R16:         return component < 2;
2908 		case FORMAT_A16B16G16R16:   return component < 3;
2909 		case FORMAT_R16I:           return component < 1;
2910 		case FORMAT_R16UI:          return component < 1;
2911 		case FORMAT_G16R16I:        return component < 2;
2912 		case FORMAT_G16R16UI:       return component < 2;
2913 		case FORMAT_X16B16G16R16I:  return component < 3;
2914 		case FORMAT_X16B16G16R16UI: return component < 3;
2915 		case FORMAT_A16B16G16R16I:  return component < 3;
2916 		case FORMAT_A16B16G16R16UI: return component < 3;
2917 		case FORMAT_V16U16:         return false;
2918 		case FORMAT_A16W16V16U16:   return false;
2919 		case FORMAT_Q16W16V16U16:   return false;
2920 		case FORMAT_YV12_BT601:     return component < 3;
2921 		case FORMAT_YV12_BT709:     return component < 3;
2922 		case FORMAT_YV12_JFIF:      return component < 3;
2923 		default:
2924 			ASSERT(false);
2925 		}
2926 
2927 		return false;
2928 	}
2929 }
2930