• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Blitter.hpp"
16 
17 #include "Reactor/Reactor.hpp"
18 #include "Common/Memory.hpp"
19 #include "Common/Debug.hpp"
20 
21 namespace sw
22 {
Blitter()23 	Blitter::Blitter()
24 	{
25 		blitCache = new RoutineCache<BlitState>(1024);
26 	}
27 
~Blitter()28 	Blitter::~Blitter()
29 	{
30 		delete blitCache;
31 	}
32 
clear(void * pixel,sw::Format format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)33 	void Blitter::clear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
34 	{
35 		if(fastClear(pixel, format, dest, dRect, rgbaMask))
36 		{
37 			return;
38 		}
39 
40 		sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format));
41 		Blitter::Options clearOptions = static_cast<sw::Blitter::Options>((rgbaMask & 0xF) | CLEAR_OPERATION);
42 		SliceRect sRect(dRect);
43 		sRect.slice = 0;
44 		blit(color, sRect, dest, dRect, clearOptions);
45 		delete color;
46 	}
47 
fastClear(void * pixel,sw::Format format,Surface * dest,const SliceRect & dRect,unsigned int rgbaMask)48 	bool Blitter::fastClear(void* pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask)
49 	{
50 		if(format != FORMAT_A32B32G32R32F)
51 		{
52 			return false;
53 		}
54 
55 		float *color = (float*)pixel;
56 		float r = color[0];
57 		float g = color[1];
58 		float b = color[2];
59 		float a = color[3];
60 
61 		uint32_t packed;
62 
63 		switch(dest->getFormat())
64 		{
65 		case FORMAT_R5G6B5:
66 			if((rgbaMask & 0x7) != 0x7) return false;
67 			packed = ((uint16_t)(31 * b + 0.5f) << 0) |
68 			         ((uint16_t)(63 * g + 0.5f) << 5) |
69 			         ((uint16_t)(31 * r + 0.5f) << 11);
70 			break;
71 		case FORMAT_X8B8G8R8:
72 			if((rgbaMask & 0x7) != 0x7) return false;
73 			packed = ((uint32_t)(255) << 24) |
74 			         ((uint32_t)(255 * b + 0.5f) << 16) |
75 			         ((uint32_t)(255 * g + 0.5f) << 8) |
76 			         ((uint32_t)(255 * r + 0.5f) << 0);
77 			break;
78 		case FORMAT_A8B8G8R8:
79 			if((rgbaMask & 0xF) != 0xF) return false;
80 			packed = ((uint32_t)(255 * a + 0.5f) << 24) |
81 			         ((uint32_t)(255 * b + 0.5f) << 16) |
82 			         ((uint32_t)(255 * g + 0.5f) << 8) |
83 			         ((uint32_t)(255 * r + 0.5f) << 0);
84 			break;
85 		case FORMAT_X8R8G8B8:
86 			if((rgbaMask & 0x7) != 0x7) return false;
87 			packed = ((uint32_t)(255) << 24) |
88 			         ((uint32_t)(255 * r + 0.5f) << 16) |
89 			         ((uint32_t)(255 * g + 0.5f) << 8) |
90 			         ((uint32_t)(255 * b + 0.5f) << 0);
91 			break;
92 		case FORMAT_A8R8G8B8:
93 			if((rgbaMask & 0xF) != 0xF) return false;
94 			packed = ((uint32_t)(255 * a + 0.5f) << 24) |
95 			         ((uint32_t)(255 * r + 0.5f) << 16) |
96 			         ((uint32_t)(255 * g + 0.5f) << 8) |
97 			         ((uint32_t)(255 * b + 0.5f) << 0);
98 			break;
99 		default:
100 			return false;
101 		}
102 
103 		uint8_t *d = (uint8_t*)dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
104 
105 		switch(Surface::bytes(dest->getFormat()))
106 		{
107 		case 2:
108 			for(int i = dRect.y0; i < dRect.y1; i++)
109 			{
110 				sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0);
111 				d += dest->getInternalPitchB();
112 			}
113 			break;
114 		case 4:
115 			for(int i = dRect.y0; i < dRect.y1; i++)
116 			{
117 				sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0);
118 				d += dest->getInternalPitchB();
119 			}
120 			break;
121 		default:
122 			assert(false);
123 		}
124 
125 		dest->unlockInternal();
126 
127 		return true;
128 	}
129 
blit(Surface * source,const SliceRect & sRect,Surface * dest,const SliceRect & dRect,bool filter,bool isStencil)130 	void Blitter::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
131 	{
132 		Blitter::Options options = WRITE_RGBA;
133 		if(filter)
134 		{
135 			options = static_cast<Blitter::Options>(options | FILTER_LINEAR);
136 		}
137 		if(isStencil)
138 		{
139 			options = static_cast<Blitter::Options>(options | USE_STENCIL);
140 		}
141 		blit(source, sRect, dest, dRect, options);
142 	}
143 
blit(Surface * source,const SliceRect & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)144 	void Blitter::blit(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
145 	{
146 		if(dest->getInternalFormat() == FORMAT_NULL)
147 		{
148 			return;
149 		}
150 
151 		if(blitReactor(source, sourceRect, dest, destRect, options))
152 		{
153 			return;
154 		}
155 
156 		SliceRect sRect = sourceRect;
157 		SliceRect dRect = destRect;
158 
159 		bool flipX = destRect.x0 > destRect.x1;
160 		bool flipY = destRect.y0 > destRect.y1;
161 
162 		if(flipX)
163 		{
164 			swap(dRect.x0, dRect.x1);
165 			swap(sRect.x0, sRect.x1);
166 		}
167 		if(flipY)
168 		{
169 			swap(dRect.y0, dRect.y1);
170 			swap(sRect.y0, sRect.y1);
171 		}
172 
173 		source->lockInternal(sRect.x0, sRect.y0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC);
174 		dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC);
175 
176 		float w = static_cast<float>(sRect.x1 - sRect.x0) / static_cast<float>(dRect.x1 - dRect.x0);
177 		float h = static_cast<float>(sRect.y1 - sRect.y0) / static_cast<float>(dRect.y1 - dRect.y0);
178 
179 		const float xStart = (float)sRect.x0 + 0.5f * w;
180 		float y = (float)sRect.y0 + 0.5f * h;
181 
182 		for(int j = dRect.y0; j < dRect.y1; j++)
183 		{
184 			float x = xStart;
185 
186 			for(int i = dRect.x0; i < dRect.x1; i++)
187 			{
188 				// FIXME: Support RGBA mask
189 				dest->copyInternal(source, i, j, x, y, (options & FILTER_LINEAR) == FILTER_LINEAR);
190 
191 				x += w;
192 			}
193 
194 			y += h;
195 		}
196 
197 		source->unlockInternal();
198 		dest->unlockInternal();
199 	}
200 
blit3D(Surface * source,Surface * dest)201 	void Blitter::blit3D(Surface *source, Surface *dest)
202 	{
203 		source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC);
204 		dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC);
205 
206 		float w = static_cast<float>(source->getWidth())  / static_cast<float>(dest->getWidth());
207 		float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight());
208 		float d = static_cast<float>(source->getDepth())  / static_cast<float>(dest->getDepth());
209 
210 		float z = 0.5f * d;
211 		for(int k = 0; k < dest->getDepth(); ++k)
212 		{
213 			float y = 0.5f * h;
214 			for(int j = 0; j < dest->getHeight(); ++j)
215 			{
216 				float x = 0.5f * w;
217 				for(int i = 0; i < dest->getWidth(); ++i)
218 				{
219 					dest->copyInternal(source, i, j, k, x, y, z, true);
220 					x += w;
221 				}
222 				y += h;
223 			}
224 			z += d;
225 		}
226 
227 		source->unlockInternal();
228 		dest->unlockInternal();
229 	}
230 
read(Float4 & c,Pointer<Byte> element,Format format)231 	bool Blitter::read(Float4 &c, Pointer<Byte> element, Format format)
232 	{
233 		c = Float4(0.0f, 0.0f, 0.0f, 1.0f);
234 
235 		switch(format)
236 		{
237 		case FORMAT_L8:
238 			c.xyz = Float(Int(*Pointer<Byte>(element)));
239 			c.w = float(0xFF);
240 			break;
241 		case FORMAT_A8:
242 			c.w = Float(Int(*Pointer<Byte>(element)));
243 			break;
244 		case FORMAT_R8I:
245 		case FORMAT_R8I_SNORM:
246 			c.x = Float(Int(*Pointer<SByte>(element)));
247 			c.w = float(0x7F);
248 			break;
249 		case FORMAT_R8:
250 		case FORMAT_R8UI:
251 			c.x = Float(Int(*Pointer<Byte>(element)));
252 			c.w = float(0xFF);
253 			break;
254 		case FORMAT_R16I:
255 			c.x = Float(Int(*Pointer<Short>(element)));
256 			c.w = float(0x7FFF);
257 			break;
258 		case FORMAT_R16UI:
259 			c.x = Float(Int(*Pointer<UShort>(element)));
260 			c.w = float(0xFFFF);
261 			break;
262 		case FORMAT_R32I:
263 			c.x = Float(*Pointer<Int>(element));
264 			c.w = float(0x7FFFFFFF);
265 			break;
266 		case FORMAT_R32UI:
267 			c.x = Float(*Pointer<UInt>(element));
268 			c.w = float(0xFFFFFFFF);
269 			break;
270 		case FORMAT_A8R8G8B8:
271 			c = Float4(*Pointer<Byte4>(element)).zyxw;
272 			break;
273 		case FORMAT_A8B8G8R8I:
274 		case FORMAT_A8B8G8R8I_SNORM:
275 			c = Float4(*Pointer<SByte4>(element));
276 			break;
277 		case FORMAT_A8B8G8R8:
278 		case FORMAT_A8B8G8R8UI:
279 		case FORMAT_SRGB8_A8:
280 			c = Float4(*Pointer<Byte4>(element));
281 			break;
282 		case FORMAT_X8R8G8B8:
283 			c = Float4(*Pointer<Byte4>(element)).zyxw;
284 			c.w = float(0xFF);
285 			break;
286 		case FORMAT_R8G8B8:
287 			c.z = Float(Int(*Pointer<Byte>(element + 0)));
288 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
289 			c.x = Float(Int(*Pointer<Byte>(element + 2)));
290 			c.w = float(0xFF);
291 			break;
292 		case FORMAT_B8G8R8:
293 			c.x = Float(Int(*Pointer<Byte>(element + 0)));
294 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
295 			c.z = Float(Int(*Pointer<Byte>(element + 2)));
296 			c.w = float(0xFF);
297 			break;
298 		case FORMAT_X8B8G8R8I:
299 		case FORMAT_X8B8G8R8I_SNORM:
300 			c = Float4(*Pointer<SByte4>(element));
301 			c.w = float(0x7F);
302 			break;
303 		case FORMAT_X8B8G8R8:
304 		case FORMAT_X8B8G8R8UI:
305 		case FORMAT_SRGB8_X8:
306 			c = Float4(*Pointer<Byte4>(element));
307 			c.w = float(0xFF);
308 			break;
309 		case FORMAT_A16B16G16R16I:
310 			c = Float4(*Pointer<Short4>(element));
311 			break;
312 		case FORMAT_A16B16G16R16:
313 		case FORMAT_A16B16G16R16UI:
314 			c = Float4(*Pointer<UShort4>(element));
315 			break;
316 		case FORMAT_X16B16G16R16I:
317 			c = Float4(*Pointer<Short4>(element));
318 			c.w = float(0x7FFF);
319 			break;
320 		case FORMAT_X16B16G16R16UI:
321 			c = Float4(*Pointer<UShort4>(element));
322 			c.w = float(0xFFFF);
323 			break;
324 		case FORMAT_A32B32G32R32I:
325 			c = Float4(*Pointer<Int4>(element));
326 			break;
327 		case FORMAT_A32B32G32R32UI:
328 			c = Float4(*Pointer<UInt4>(element));
329 			break;
330 		case FORMAT_X32B32G32R32I:
331 			c = Float4(*Pointer<Int4>(element));
332 			c.w = float(0x7FFFFFFF);
333 			break;
334 		case FORMAT_X32B32G32R32UI:
335 			c = Float4(*Pointer<UInt4>(element));
336 			c.w = float(0xFFFFFFFF);
337 			break;
338 		case FORMAT_G8R8I:
339 		case FORMAT_G8R8I_SNORM:
340 			c.x = Float(Int(*Pointer<SByte>(element + 0)));
341 			c.y = Float(Int(*Pointer<SByte>(element + 1)));
342 			c.w = float(0x7F);
343 			break;
344 		case FORMAT_G8R8:
345 		case FORMAT_G8R8UI:
346 			c.x = Float(Int(*Pointer<Byte>(element + 0)));
347 			c.y = Float(Int(*Pointer<Byte>(element + 1)));
348 			c.w = float(0xFF);
349 			break;
350 		case FORMAT_G16R16I:
351 			c.x = Float(Int(*Pointer<Short>(element + 0)));
352 			c.y = Float(Int(*Pointer<Short>(element + 2)));
353 			c.w = float(0x7FFF);
354 			break;
355 		case FORMAT_G16R16:
356 		case FORMAT_G16R16UI:
357 			c.x = Float(Int(*Pointer<UShort>(element + 0)));
358 			c.y = Float(Int(*Pointer<UShort>(element + 2)));
359 			c.w = float(0xFFFF);
360 			break;
361 		case FORMAT_G32R32I:
362 			c.x = Float(*Pointer<Int>(element + 0));
363 			c.y = Float(*Pointer<Int>(element + 4));
364 			c.w = float(0x7FFFFFFF);
365 			break;
366 		case FORMAT_G32R32UI:
367 			c.x = Float(*Pointer<UInt>(element + 0));
368 			c.y = Float(*Pointer<UInt>(element + 4));
369 			c.w = float(0xFFFFFFFF);
370 			break;
371 		case FORMAT_A32B32G32R32F:
372 			c = *Pointer<Float4>(element);
373 			break;
374 		case FORMAT_X32B32G32R32F:
375 		case FORMAT_B32G32R32F:
376 			c.z = *Pointer<Float>(element + 8);
377 		case FORMAT_G32R32F:
378 			c.x = *Pointer<Float>(element + 0);
379 			c.y = *Pointer<Float>(element + 4);
380 			break;
381 		case FORMAT_R32F:
382 			c.x = *Pointer<Float>(element);
383 			break;
384 		case FORMAT_R5G6B5:
385 			c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11)));
386 			c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5)));
387 			c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F)));
388 			break;
389 		case FORMAT_A2B10G10R10:
390 			c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF))));
391 			c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10));
392 			c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20));
393 			c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30));
394 			break;
395 		case FORMAT_D16:
396 			c.x = Float(Int((*Pointer<UShort>(element))));
397 			break;
398 		case FORMAT_D24S8:
399 			c.x = Float(Int((*Pointer<UInt>(element))));
400 			break;
401 		case FORMAT_D32:
402 			c.x = Float(Int((*Pointer<UInt>(element))));
403 			break;
404 		case FORMAT_D32F:
405 			c.x = *Pointer<Float>(element);
406 			break;
407 		case FORMAT_D32F_COMPLEMENTARY:
408 			c.x = 1.0f - *Pointer<Float>(element);
409 			break;
410 		case FORMAT_D32F_LOCKABLE:
411 			c.x = *Pointer<Float>(element);
412 			break;
413 		case FORMAT_D32FS8_TEXTURE:
414 			c.x = *Pointer<Float>(element);
415 			break;
416 		case FORMAT_D32FS8_SHADOW:
417 			c.x = *Pointer<Float>(element);
418 			break;
419 		case FORMAT_S8:
420 			c.x = Float(Int(*Pointer<Byte>(element)));
421 			break;
422 		default:
423 			return false;
424 		}
425 
426 		return true;
427 	}
428 
write(Float4 & c,Pointer<Byte> element,Format format,const Blitter::Options & options)429 	bool Blitter::write(Float4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
430 	{
431 		bool writeR = (options & WRITE_RED) == WRITE_RED;
432 		bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
433 		bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
434 		bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
435 		bool writeRGBA = writeR && writeG && writeB && writeA;
436 
437 		switch(format)
438 		{
439 		case FORMAT_L8:
440 			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
441 			break;
442 		case FORMAT_A8:
443 			if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); }
444 			break;
445 		case FORMAT_A8R8G8B8:
446 			if(writeRGBA)
447 			{
448 				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw));
449 				*Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
450 			}
451 			else
452 			{
453 				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
454 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
455 				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
456 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
457 			}
458 			break;
459 		case FORMAT_A8B8G8R8:
460 		case FORMAT_SRGB8_A8:
461 			if(writeRGBA)
462 			{
463 				UShort4 c0 = As<UShort4>(RoundShort4(c));
464 				*Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
465 			}
466 			else
467 			{
468 				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
469 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
470 				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
471 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
472 			}
473 			break;
474 		case FORMAT_X8R8G8B8:
475 			if(writeRGBA)
476 			{
477 				UShort4 c0 = As<UShort4>(RoundShort4(c.zyxw)) | UShort4(0x0000, 0x0000, 0x0000, 0xFFFFu);
478 				*Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
479 			}
480 			else
481 			{
482 				if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
483 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
484 				if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
485 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
486 			}
487 			break;
488 		case FORMAT_X8B8G8R8:
489 		case FORMAT_SRGB8_X8:
490 			if(writeRGBA)
491 			{
492 				UShort4 c0 = As<UShort4>(RoundShort4(c)) | UShort4(0x0000, 0x0000, 0x0000, 0xFFFFu);
493 				*Pointer<Byte4>(element) = Byte4(Pack(c0, c0));
494 			}
495 			else
496 			{
497 				if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
498 				if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
499 				if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
500 				if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); }
501 			}
502 			break;
503 		case FORMAT_R8G8B8:
504 			if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); }
505 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
506 			if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); }
507 			break;
508 		case FORMAT_B8G8R8:
509 			if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); }
510 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
511 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
512 			break;
513 		case FORMAT_A32B32G32R32F:
514 			if(writeRGBA)
515 			{
516 				*Pointer<Float4>(element) = c;
517 			}
518 			else
519 			{
520 				if(writeR) { *Pointer<Float>(element) = c.x; }
521 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
522 				if(writeB) { *Pointer<Float>(element + 8) = c.z; }
523 				if(writeA) { *Pointer<Float>(element + 12) = c.w; }
524 			}
525 			break;
526 		case FORMAT_X32B32G32R32F:
527 			if(writeA) { *Pointer<Float>(element + 12) = 1.0f; }
528 		case FORMAT_B32G32R32F:
529 			if(writeR) { *Pointer<Float>(element) = c.x; }
530 			if(writeG) { *Pointer<Float>(element + 4) = c.y; }
531 			if(writeB) { *Pointer<Float>(element + 8) = c.z; }
532 			break;
533 		case FORMAT_G32R32F:
534 			if(writeR && writeG)
535 			{
536 				*Pointer<Float2>(element) = Float2(c);
537 			}
538 			else
539 			{
540 				if(writeR) { *Pointer<Float>(element) = c.x; }
541 				if(writeG) { *Pointer<Float>(element + 4) = c.y; }
542 			}
543 			break;
544 		case FORMAT_R32F:
545 			if(writeR) { *Pointer<Float>(element) = c.x; }
546 			break;
547 		case FORMAT_A8B8G8R8I:
548 		case FORMAT_A8B8G8R8I_SNORM:
549 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); }
550 		case FORMAT_X8B8G8R8I:
551 		case FORMAT_X8B8G8R8I_SNORM:
552 			if(writeA && (format == FORMAT_X8B8G8R8I || format == FORMAT_X8B8G8R8I_SNORM))
553 			{
554 				*Pointer<SByte>(element + 3) = SByte(0x7F);
555 			}
556 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); }
557 		case FORMAT_G8R8I:
558 		case FORMAT_G8R8I_SNORM:
559 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); }
560 		case FORMAT_R8I:
561 		case FORMAT_R8I_SNORM:
562 			if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); }
563 			break;
564 		case FORMAT_A8B8G8R8UI:
565 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); }
566 		case FORMAT_X8B8G8R8UI:
567 			if(writeA && (format == FORMAT_X8B8G8R8UI))
568 			{
569 				*Pointer<Byte>(element + 3) = Byte(0xFF);
570 			}
571 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); }
572 		case FORMAT_G8R8UI:
573 		case FORMAT_G8R8:
574 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); }
575 		case FORMAT_R8UI:
576 		case FORMAT_R8:
577 			if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); }
578 			break;
579 		case FORMAT_A16B16G16R16I:
580 			if(writeRGBA)
581 			{
582 				*Pointer<Short4>(element) = Short4(RoundInt(c));
583 			}
584 			else
585 			{
586 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
587 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
588 				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
589 				if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); }
590 			}
591 			break;
592 		case FORMAT_X16B16G16R16I:
593 			if(writeRGBA)
594 			{
595 				*Pointer<Short4>(element) = Short4(RoundInt(c));
596 			}
597 			else
598 			{
599 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
600 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
601 				if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); }
602 			}
603 			if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); }
604 			break;
605 		case FORMAT_G16R16I:
606 			if(writeR && writeG)
607 			{
608 				*Pointer<Short2>(element) = Short2(Short4(RoundInt(c)));
609 			}
610 			else
611 			{
612 				if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
613 				if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); }
614 			}
615 			break;
616 		case FORMAT_R16I:
617 			if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); }
618 			break;
619 		case FORMAT_A16B16G16R16UI:
620 		case FORMAT_A16B16G16R16:
621 			if(writeRGBA)
622 			{
623 				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
624 			}
625 			else
626 			{
627 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
628 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
629 				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
630 				if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); }
631 			}
632 			break;
633 		case FORMAT_X16B16G16R16UI:
634 			if(writeRGBA)
635 			{
636 				*Pointer<UShort4>(element) = UShort4(RoundInt(c));
637 			}
638 			else
639 			{
640 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
641 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
642 				if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); }
643 			}
644 			if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); }
645 			break;
646 		case FORMAT_G16R16UI:
647 		case FORMAT_G16R16:
648 			if(writeR && writeG)
649 			{
650 				*Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c)));
651 			}
652 			else
653 			{
654 				if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
655 				if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); }
656 			}
657 			break;
658 		case FORMAT_R16UI:
659 			if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); }
660 			break;
661 		case FORMAT_A32B32G32R32I:
662 			if(writeRGBA)
663 			{
664 				*Pointer<Int4>(element) = RoundInt(c);
665 			}
666 			else
667 			{
668 				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
669 				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
670 				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
671 				if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); }
672 			}
673 			break;
674 		case FORMAT_X32B32G32R32I:
675 			if(writeRGBA)
676 			{
677 				*Pointer<Int4>(element) = RoundInt(c);
678 			}
679 			else
680 			{
681 				if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
682 				if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
683 				if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); }
684 			}
685 			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
686 			break;
687 		case FORMAT_G32R32I:
688 			if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); }
689 		case FORMAT_R32I:
690 			if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); }
691 			break;
692 		case FORMAT_A32B32G32R32UI:
693 			if(writeRGBA)
694 			{
695 				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
696 			}
697 			else
698 			{
699 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
700 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
701 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
702 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); }
703 			}
704 			break;
705 		case FORMAT_X32B32G32R32UI:
706 			if(writeRGBA)
707 			{
708 				*Pointer<UInt4>(element) = UInt4(RoundInt(c));
709 			}
710 			else
711 			{
712 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
713 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
714 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); }
715 			}
716 			if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); }
717 			break;
718 		case FORMAT_G32R32UI:
719 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); }
720 		case FORMAT_R32UI:
721 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); }
722 			break;
723 		case FORMAT_R5G6B5:
724 			if(writeR && writeG && writeB)
725 			{
726 				*Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) |
727 				                                  (RoundInt(Float(c.y)) << Int(5)) |
728 				                                  (RoundInt(Float(c.x)) << Int(11)));
729 			}
730 			else
731 			{
732 				unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000);
733 				unsigned short unmask = ~mask;
734 				*Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) |
735 				                            (UShort(RoundInt(Float(c.z)) |
736 				                                   (RoundInt(Float(c.y)) << Int(5)) |
737 				                                   (RoundInt(Float(c.x)) << Int(11))) & UShort(mask));
738 			}
739 			break;
740 		case FORMAT_A2B10G10R10:
741 			if(writeRGBA)
742 			{
743 				*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) |
744 				                              (RoundInt(Float(c.y)) << 10) |
745 				                              (RoundInt(Float(c.z)) << 20) |
746 				                              (RoundInt(Float(c.w)) << 30));
747 			}
748 			else
749 			{
750 				unsigned int mask = (writeA ? 0xC0000000 : 0x0000) |
751 				                    (writeB ? 0x3FF00000 : 0x0000) |
752 				                    (writeG ? 0x000FFC00 : 0x0000) |
753 				                    (writeR ? 0x000003FF : 0x0000);
754 				unsigned int unmask = ~mask;
755 				*Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) |
756 				                            (UInt(RoundInt(Float(c.x)) |
757 				                                  (RoundInt(Float(c.y)) << 10) |
758 				                                  (RoundInt(Float(c.z)) << 20) |
759 				                                  (RoundInt(Float(c.w)) << 30)) & UInt(mask));
760 			}
761 			break;
762 		case FORMAT_D16:
763 			*Pointer<UShort>(element) = UShort(RoundInt(Float(c.x)));
764 			break;
765 		case FORMAT_D24S8:
766 			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
767 			break;
768 		case FORMAT_D32:
769 			*Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)));
770 			break;
771 		case FORMAT_D32F:
772 			*Pointer<Float>(element) = c.x;
773 			break;
774 		case FORMAT_D32F_COMPLEMENTARY:
775 			*Pointer<Float>(element) = 1.0f - c.x;
776 			break;
777 		case FORMAT_D32F_LOCKABLE:
778 			*Pointer<Float>(element) = c.x;
779 			break;
780 		case FORMAT_D32FS8_TEXTURE:
781 			*Pointer<Float>(element) = c.x;
782 			break;
783 		case FORMAT_D32FS8_SHADOW:
784 			*Pointer<Float>(element) = c.x;
785 			break;
786 		case FORMAT_S8:
787 			*Pointer<Byte>(element) = Byte(RoundInt(Float(c.x)));
788 			break;
789 		default:
790 			return false;
791 		}
792 		return true;
793 	}
794 
read(Int4 & c,Pointer<Byte> element,Format format)795 	bool Blitter::read(Int4 &c, Pointer<Byte> element, Format format)
796 	{
797 		c = Int4(0, 0, 0, 1);
798 
799 		switch(format)
800 		{
801 		case FORMAT_A8B8G8R8I:
802 			c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3);
803 		case FORMAT_X8B8G8R8I:
804 			c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2);
805 		case FORMAT_G8R8I:
806 			c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1);
807 		case FORMAT_R8I:
808 			c = Insert(c, Int(*Pointer<SByte>(element)), 0);
809 			break;
810 		case FORMAT_A8B8G8R8UI:
811 			c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3);
812 		case FORMAT_X8B8G8R8UI:
813 			c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2);
814 		case FORMAT_G8R8UI:
815 			c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1);
816 		case FORMAT_R8UI:
817 			c = Insert(c, Int(*Pointer<Byte>(element)), 0);
818 			break;
819 		case FORMAT_A16B16G16R16I:
820 			c = Insert(c, Int(*Pointer<Short>(element + 6)), 3);
821 		case FORMAT_X16B16G16R16I:
822 			c = Insert(c, Int(*Pointer<Short>(element + 4)), 2);
823 		case FORMAT_G16R16I:
824 			c = Insert(c, Int(*Pointer<Short>(element + 2)), 1);
825 		case FORMAT_R16I:
826 			c = Insert(c, Int(*Pointer<Short>(element)), 0);
827 			break;
828 		case FORMAT_A16B16G16R16UI:
829 			c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3);
830 		case FORMAT_X16B16G16R16UI:
831 			c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2);
832 		case FORMAT_G16R16UI:
833 			c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1);
834 		case FORMAT_R16UI:
835 			c = Insert(c, Int(*Pointer<UShort>(element)), 0);
836 			break;
837 		case FORMAT_A32B32G32R32I:
838 		case FORMAT_A32B32G32R32UI:
839 			c = *Pointer<Int4>(element);
840 			break;
841 		case FORMAT_X32B32G32R32I:
842 		case FORMAT_X32B32G32R32UI:
843 			c = Insert(c, *Pointer<Int>(element + 8), 2);
844 		case FORMAT_G32R32I:
845 		case FORMAT_G32R32UI:
846 			c = Insert(c, *Pointer<Int>(element + 4), 1);
847 		case FORMAT_R32I:
848 		case FORMAT_R32UI:
849 			c = Insert(c, *Pointer<Int>(element), 0);
850 			break;
851 		default:
852 			return false;
853 		}
854 
855 		return true;
856 	}
857 
write(Int4 & c,Pointer<Byte> element,Format format,const Blitter::Options & options)858 	bool Blitter::write(Int4 &c, Pointer<Byte> element, Format format, const Blitter::Options& options)
859 	{
860 		bool writeR = (options & WRITE_RED) == WRITE_RED;
861 		bool writeG = (options & WRITE_GREEN) == WRITE_GREEN;
862 		bool writeB = (options & WRITE_BLUE) == WRITE_BLUE;
863 		bool writeA = (options & WRITE_ALPHA) == WRITE_ALPHA;
864 		bool writeRGBA = writeR && writeG && writeB && writeA;
865 
866 		switch(format)
867 		{
868 		case FORMAT_A8B8G8R8I:
869 			if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); }
870 		case FORMAT_X8B8G8R8I:
871 			if(writeA && (format != FORMAT_A8B8G8R8I))
872 			{
873 				*Pointer<SByte>(element + 3) = SByte(0x7F);
874 			}
875 			if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); }
876 		case FORMAT_G8R8I:
877 			if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); }
878 		case FORMAT_R8I:
879 			if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); }
880 			break;
881 		case FORMAT_A8B8G8R8UI:
882 			if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); }
883 		case FORMAT_X8B8G8R8UI:
884 			if(writeA && (format != FORMAT_A8B8G8R8UI))
885 			{
886 				*Pointer<Byte>(element + 3) = Byte(0xFF);
887 			}
888 			if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); }
889 		case FORMAT_G8R8UI:
890 			if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); }
891 		case FORMAT_R8UI:
892 			if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); }
893 			break;
894 		case FORMAT_A16B16G16R16I:
895 			if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); }
896 		case FORMAT_X16B16G16R16I:
897 			if(writeA && (format != FORMAT_A16B16G16R16I))
898 			{
899 				*Pointer<Short>(element + 6) = Short(0x7FFF);
900 			}
901 			if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); }
902 		case FORMAT_G16R16I:
903 			if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); }
904 		case FORMAT_R16I:
905 			if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); }
906 			break;
907 		case FORMAT_A16B16G16R16UI:
908 			if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); }
909 		case FORMAT_X16B16G16R16UI:
910 			if(writeA && (format != FORMAT_A16B16G16R16UI))
911 			{
912 				*Pointer<UShort>(element + 6) = UShort(0xFFFF);
913 			}
914 			if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); }
915 		case FORMAT_G16R16UI:
916 			if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); }
917 		case FORMAT_R16UI:
918 			if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); }
919 			break;
920 		case FORMAT_A32B32G32R32I:
921 			if(writeRGBA)
922 			{
923 				*Pointer<Int4>(element) = c;
924 			}
925 			else
926 			{
927 				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
928 				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
929 				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
930 				if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); }
931 			}
932 			break;
933 		case FORMAT_X32B32G32R32I:
934 			if(writeRGBA)
935 			{
936 				*Pointer<Int4>(element) = c;
937 			}
938 			else
939 			{
940 				if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
941 				if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
942 				if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); }
943 			}
944 			if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); }
945 			break;
946 		case FORMAT_G32R32I:
947 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
948 			if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); }
949 			break;
950 		case FORMAT_R32I:
951 			if(writeR) { *Pointer<Int>(element) = Extract(c, 0); }
952 			break;
953 		case FORMAT_A32B32G32R32UI:
954 			if(writeRGBA)
955 			{
956 				*Pointer<UInt4>(element) = As<UInt4>(c);
957 			}
958 			else
959 			{
960 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
961 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
962 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
963 				if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); }
964 			}
965 			break;
966 		case FORMAT_X32B32G32R32UI:
967 			if(writeRGBA)
968 			{
969 				*Pointer<UInt4>(element) = As<UInt4>(c);
970 			}
971 			else
972 			{
973 				if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
974 				if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
975 				if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); }
976 			}
977 			if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); }
978 			break;
979 		case FORMAT_G32R32UI:
980 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
981 			if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); }
982 			break;
983 		case FORMAT_R32UI:
984 			if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); }
985 			break;
986 		default:
987 			return false;
988 		}
989 
990 		return true;
991 	}
992 
GetScale(float4 & scale,Format format)993 	bool Blitter::GetScale(float4& scale, Format format)
994 	{
995 		switch(format)
996 		{
997 		case FORMAT_L8:
998 		case FORMAT_A8:
999 		case FORMAT_A8R8G8B8:
1000 		case FORMAT_X8R8G8B8:
1001 		case FORMAT_R8:
1002 		case FORMAT_G8R8:
1003 		case FORMAT_R8G8B8:
1004 		case FORMAT_B8G8R8:
1005 		case FORMAT_X8B8G8R8:
1006 		case FORMAT_A8B8G8R8:
1007 		case FORMAT_SRGB8_X8:
1008 		case FORMAT_SRGB8_A8:
1009 			scale = vector(0xFF, 0xFF, 0xFF, 0xFF);
1010 			break;
1011 		case FORMAT_R8I_SNORM:
1012 		case FORMAT_G8R8I_SNORM:
1013 		case FORMAT_X8B8G8R8I_SNORM:
1014 		case FORMAT_A8B8G8R8I_SNORM:
1015 			scale = vector(0x7F, 0x7F, 0x7F, 0x7F);
1016 			break;
1017 		case FORMAT_A16B16G16R16:
1018 			scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
1019 			break;
1020 		case FORMAT_R8I:
1021 		case FORMAT_R8UI:
1022 		case FORMAT_G8R8I:
1023 		case FORMAT_G8R8UI:
1024 		case FORMAT_X8B8G8R8I:
1025 		case FORMAT_X8B8G8R8UI:
1026 		case FORMAT_A8B8G8R8I:
1027 		case FORMAT_A8B8G8R8UI:
1028 		case FORMAT_R16I:
1029 		case FORMAT_R16UI:
1030 		case FORMAT_G16R16:
1031 		case FORMAT_G16R16I:
1032 		case FORMAT_G16R16UI:
1033 		case FORMAT_X16B16G16R16I:
1034 		case FORMAT_X16B16G16R16UI:
1035 		case FORMAT_A16B16G16R16I:
1036 		case FORMAT_A16B16G16R16UI:
1037 		case FORMAT_R32I:
1038 		case FORMAT_R32UI:
1039 		case FORMAT_G32R32I:
1040 		case FORMAT_G32R32UI:
1041 		case FORMAT_X32B32G32R32I:
1042 		case FORMAT_X32B32G32R32UI:
1043 		case FORMAT_A32B32G32R32I:
1044 		case FORMAT_A32B32G32R32UI:
1045 		case FORMAT_A32B32G32R32F:
1046 		case FORMAT_X32B32G32R32F:
1047 		case FORMAT_B32G32R32F:
1048 		case FORMAT_G32R32F:
1049 		case FORMAT_R32F:
1050 			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1051 			break;
1052 		case FORMAT_R5G6B5:
1053 			scale = vector(0x1F, 0x3F, 0x1F, 1.0f);
1054 			break;
1055 		case FORMAT_A2B10G10R10:
1056 			scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03);
1057 			break;
1058 		case FORMAT_D16:
1059 			scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f);
1060 			break;
1061 		case FORMAT_D24S8:
1062 			scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f);
1063 			break;
1064 		case FORMAT_D32:
1065 			scale = vector(static_cast<float>(0xFFFFFFFF), 0.0f, 0.0f, 0.0f);
1066 			break;
1067 		case FORMAT_D32F:
1068 		case FORMAT_D32F_COMPLEMENTARY:
1069 		case FORMAT_D32F_LOCKABLE:
1070 		case FORMAT_D32FS8_TEXTURE:
1071 		case FORMAT_D32FS8_SHADOW:
1072 		case FORMAT_S8:
1073 			scale = vector(1.0f, 1.0f, 1.0f, 1.0f);
1074 			break;
1075 		default:
1076 			return false;
1077 		}
1078 
1079 		return true;
1080 	}
1081 
ApplyScaleAndClamp(Float4 & value,const BlitState & state)1082 	bool Blitter::ApplyScaleAndClamp(Float4& value, const BlitState& state)
1083 	{
1084 		float4 scale, unscale;
1085 		if(Surface::isNonNormalizedInteger(state.sourceFormat) &&
1086 		   !Surface::isNonNormalizedInteger(state.destFormat) &&
1087 		   (state.options & CLEAR_OPERATION))
1088 		{
1089 			// If we're clearing a buffer from an int or uint color into a normalized color,
1090 			// then the whole range of the int or uint color must be scaled between 0 and 1.
1091 			switch(state.sourceFormat)
1092 			{
1093 			case FORMAT_A32B32G32R32I:
1094 				unscale = replicate(static_cast<float>(0x7FFFFFFF));
1095 				break;
1096 			case FORMAT_A32B32G32R32UI:
1097 				unscale = replicate(static_cast<float>(0xFFFFFFFF));
1098 				break;
1099 			default:
1100 				return false;
1101 			}
1102 		}
1103 		else if(!GetScale(unscale, state.sourceFormat))
1104 		{
1105 			return false;
1106 		}
1107 
1108 		if(!GetScale(scale, state.destFormat))
1109 		{
1110 			return false;
1111 		}
1112 
1113 		if(unscale != scale)
1114 		{
1115 			value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w);
1116 		}
1117 
1118 		if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat))
1119 		{
1120 			value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w));
1121 
1122 			value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x,
1123 			                          Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y,
1124 			                          Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z,
1125 			                          Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w));
1126 		}
1127 
1128 		return true;
1129 	}
1130 
ComputeOffset(Int & x,Int & y,Int & pitchB,int bytes,bool quadLayout)1131 	Int Blitter::ComputeOffset(Int& x, Int& y, Int& pitchB, int bytes, bool quadLayout)
1132 	{
1133 		return (quadLayout ? (y & Int(~1)) : RValue<Int>(y)) * pitchB +
1134 		       (quadLayout ? ((y & Int(1)) << 1) + (x * 2) - (x & Int(1)) : RValue<Int>(x)) * bytes;
1135 	}
1136 
generate(BlitState & state)1137 	Routine *Blitter::generate(BlitState &state)
1138 	{
1139 		Function<Void(Pointer<Byte>)> function;
1140 		{
1141 			Pointer<Byte> blit(function.Arg<0>());
1142 
1143 			Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source));
1144 			Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest));
1145 			Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB));
1146 			Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB));
1147 
1148 			Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0));
1149 			Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0));
1150 			Float w = *Pointer<Float>(blit + OFFSET(BlitData,w));
1151 			Float h = *Pointer<Float>(blit + OFFSET(BlitData,h));
1152 
1153 			Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d));
1154 			Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d));
1155 			Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d));
1156 			Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d));
1157 
1158 			Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth));
1159 			Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight));
1160 
1161 			bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat);
1162 			bool intDst = Surface::isNonNormalizedInteger(state.destFormat);
1163 			bool intBoth = intSrc && intDst;
1164 			bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat);
1165 			bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat);
1166 			int srcBytes = Surface::bytes(state.sourceFormat);
1167 			int dstBytes = Surface::bytes(state.destFormat);
1168 
1169 			bool hasConstantColorI = false;
1170 			Int4 constantColorI;
1171 			bool hasConstantColorF = false;
1172 			Float4 constantColorF;
1173 			if(state.options & CLEAR_OPERATION)
1174 			{
1175 				if(intBoth) // Integer types
1176 				{
1177 					if(!read(constantColorI, source, state.sourceFormat))
1178 					{
1179 						return nullptr;
1180 					}
1181 					hasConstantColorI = true;
1182 				}
1183 				else
1184 				{
1185 					if(!read(constantColorF, source, state.sourceFormat))
1186 					{
1187 						return nullptr;
1188 					}
1189 					hasConstantColorF = true;
1190 
1191 					if(!ApplyScaleAndClamp(constantColorF, state))
1192 					{
1193 						return nullptr;
1194 					}
1195 				}
1196 			}
1197 
1198 			Float y = y0;
1199 
1200 			For(Int j = y0d, j < y1d, j++)
1201 			{
1202 				Float x = x0;
1203 				Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB;
1204 
1205 				For(Int i = x0d, i < x1d, i++)
1206 				{
1207 					Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes;
1208 					if(hasConstantColorI)
1209 					{
1210 						if(!write(constantColorI, d, state.destFormat, state.options))
1211 						{
1212 							return nullptr;
1213 						}
1214 					}
1215 					else if(hasConstantColorF)
1216 					{
1217 						if(!write(constantColorF, d, state.destFormat, state.options))
1218 						{
1219 							return nullptr;
1220 						}
1221 					}
1222 					else if(intBoth) // Integer types do not support filtering
1223 					{
1224 						Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision
1225 						Int X = Int(x);
1226 						Int Y = Int(y);
1227 
1228 						Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1229 
1230 						if(!read(color, s, state.sourceFormat))
1231 						{
1232 							return nullptr;
1233 						}
1234 
1235 						if(!write(color, d, state.destFormat, state.options))
1236 						{
1237 							return nullptr;
1238 						}
1239 					}
1240 					else
1241 					{
1242 						Float4 color;
1243 
1244 						if(!(state.options & FILTER_LINEAR) || intSrc)
1245 						{
1246 							Int X = Int(x);
1247 							Int Y = Int(y);
1248 
1249 							Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout);
1250 
1251 							if(!read(color, s, state.sourceFormat))
1252 							{
1253 								return nullptr;
1254 							}
1255 						}
1256 						else   // Bilinear filtering
1257 						{
1258 							Float x0 = x - 0.5f;
1259 							Float y0 = y - 0.5f;
1260 
1261 							Int X0 = Max(Int(x0), 0);
1262 							Int Y0 = Max(Int(y0), 0);
1263 
1264 							Int X1 = IfThenElse(X0 + 1 >= sWidth, X0, X0 + 1);
1265 							Int Y1 = IfThenElse(Y0 + 1 >= sHeight, Y0, Y0 + 1);
1266 
1267 							Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout);
1268 							Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout);
1269 							Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout);
1270 							Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout);
1271 
1272 							Float4 c00; if(!read(c00, s00, state.sourceFormat)) return nullptr;
1273 							Float4 c01; if(!read(c01, s01, state.sourceFormat)) return nullptr;
1274 							Float4 c10; if(!read(c10, s10, state.sourceFormat)) return nullptr;
1275 							Float4 c11; if(!read(c11, s11, state.sourceFormat)) return nullptr;
1276 
1277 							Float4 fx = Float4(x0 - Float(X0));
1278 							Float4 fy = Float4(y0 - Float(Y0));
1279 
1280 							color = c00 * (Float4(1.0f) - fx) * (Float4(1.0f) - fy) +
1281 							        c01 * fx * (Float4(1.0f) - fy) +
1282 							        c10 * (Float4(1.0f) - fx) * fy +
1283 							        c11 * fx * fy;
1284 						}
1285 
1286 						if(!ApplyScaleAndClamp(color, state) || !write(color, d, state.destFormat, state.options))
1287 						{
1288 							return nullptr;
1289 						}
1290 					}
1291 
1292 					if(!hasConstantColorI && !hasConstantColorF) { x += w; }
1293 				}
1294 
1295 				if(!hasConstantColorI && !hasConstantColorF) { y += h; }
1296 			}
1297 		}
1298 
1299 		return function(L"BlitRoutine");
1300 	}
1301 
blitReactor(Surface * source,const SliceRect & sourceRect,Surface * dest,const SliceRect & destRect,const Blitter::Options & options)1302 	bool Blitter::blitReactor(Surface *source, const SliceRect &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options)
1303 	{
1304 		ASSERT(!(options & CLEAR_OPERATION) || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1)));
1305 
1306 		Rect dRect = destRect;
1307 		Rect sRect = sourceRect;
1308 		if(destRect.x0 > destRect.x1)
1309 		{
1310 			swap(dRect.x0, dRect.x1);
1311 			swap(sRect.x0, sRect.x1);
1312 		}
1313 		if(destRect.y0 > destRect.y1)
1314 		{
1315 			swap(dRect.y0, dRect.y1);
1316 			swap(sRect.y0, sRect.y1);
1317 		}
1318 
1319 		BlitState state;
1320 
1321 		bool useSourceInternal = !source->isExternalDirty();
1322 		bool useDestInternal = !dest->isExternalDirty();
1323 		bool isStencil = ((options & USE_STENCIL) == USE_STENCIL);
1324 
1325 		state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal);
1326 		state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal);
1327 		state.options = options;
1328 
1329 		criticalSection.lock();
1330 		Routine *blitRoutine = blitCache->query(state);
1331 
1332 		if(!blitRoutine)
1333 		{
1334 			blitRoutine = generate(state);
1335 
1336 			if(!blitRoutine)
1337 			{
1338 				criticalSection.unlock();
1339 				return false;
1340 			}
1341 
1342 			blitCache->add(state, blitRoutine);
1343 		}
1344 
1345 		criticalSection.unlock();
1346 
1347 		void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry();
1348 
1349 		BlitData data;
1350 
1351 		bool isRGBA = ((options & WRITE_RGBA) == WRITE_RGBA);
1352 		bool isEntireDest = dest->isEntire(destRect);
1353 
1354 		data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) :
1355 		                          source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal);
1356 		data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) :
1357 		                        dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal);
1358 		data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal);
1359 		data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal);
1360 
1361 		data.w = 1.0f / (dRect.x1 - dRect.x0) * (sRect.x1 - sRect.x0);
1362 		data.h = 1.0f / (dRect.y1 - dRect.y0) * (sRect.y1 - sRect.y0);
1363 		data.x0 = (float)sRect.x0 + 0.5f * data.w;
1364 		data.y0 = (float)sRect.y0 + 0.5f * data.h;
1365 
1366 		data.x0d = dRect.x0;
1367 		data.x1d = dRect.x1;
1368 		data.y0d = dRect.y0;
1369 		data.y1d = dRect.y1;
1370 
1371 		data.sWidth = source->getWidth();
1372 		data.sHeight = source->getHeight();
1373 
1374 		blitFunction(&data);
1375 
1376 		if(isStencil)
1377 		{
1378 			source->unlockStencil();
1379 			dest->unlockStencil();
1380 		}
1381 		else
1382 		{
1383 			source->unlock(useSourceInternal);
1384 			dest->unlock(useDestInternal);
1385 		}
1386 
1387 		return true;
1388 	}
1389 }
1390