• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "Renderer.hpp"
16 
17 #include "Clipper.hpp"
18 #include "Math.hpp"
19 #include "FrameBuffer.hpp"
20 #include "Timer.hpp"
21 #include "Surface.hpp"
22 #include "Half.hpp"
23 #include "Primitive.hpp"
24 #include "Polygon.hpp"
25 #include "SwiftConfig.hpp"
26 #include "MutexLock.hpp"
27 #include "CPUID.hpp"
28 #include "Memory.hpp"
29 #include "Resource.hpp"
30 #include "Constants.hpp"
31 #include "Debug.hpp"
32 #include "Reactor/Reactor.hpp"
33 
34 #undef max
35 
36 bool disableServer = true;
37 
38 #ifndef NDEBUG
39 unsigned int minPrimitives = 1;
40 unsigned int maxPrimitives = 1 << 21;
41 #endif
42 
43 namespace sw
44 {
45 	extern bool halfIntegerCoordinates;     // Pixel centers are not at integer coordinates
46 	extern bool symmetricNormalizedDepth;   // [-1, 1] instead of [0, 1]
47 	extern bool booleanFaceRegister;
48 	extern bool fullPixelPositionRegister;
49 	extern bool leadingVertexFirst;         // Flat shading uses first vertex, else last
50 	extern bool secondaryColor;             // Specular lighting is applied after texturing
51 	extern bool colorsDefaultToZero;
52 
53 	extern bool forceWindowed;
54 	extern bool complementaryDepthBuffer;
55 	extern bool postBlendSRGB;
56 	extern bool exactColorRounding;
57 	extern TransparencyAntialiasing transparencyAntialiasing;
58 	extern bool forceClearRegisters;
59 
60 	extern bool precacheVertex;
61 	extern bool precacheSetup;
62 	extern bool precachePixel;
63 
64 	int batchSize = 128;
65 	int threadCount = 1;
66 	int unitCount = 1;
67 	int clusterCount = 1;
68 
69 	TranscendentalPrecision logPrecision = ACCURATE;
70 	TranscendentalPrecision expPrecision = ACCURATE;
71 	TranscendentalPrecision rcpPrecision = ACCURATE;
72 	TranscendentalPrecision rsqPrecision = ACCURATE;
73 	bool perspectiveCorrection = true;
74 
75 	struct Parameters
76 	{
77 		Renderer *renderer;
78 		int threadIndex;
79 	};
80 
DrawCall()81 	DrawCall::DrawCall()
82 	{
83 		queries = 0;
84 
85 		vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
86 		vsDirtyConstI = 16;
87 		vsDirtyConstB = 16;
88 
89 		psDirtyConstF = FRAGMENT_UNIFORM_VECTORS;
90 		psDirtyConstI = 16;
91 		psDirtyConstB = 16;
92 
93 		references = -1;
94 
95 		data = (DrawData*)allocate(sizeof(DrawData));
96 		data->constants = &constants;
97 	}
98 
~DrawCall()99 	DrawCall::~DrawCall()
100 	{
101 		delete queries;
102 
103 		deallocate(data);
104 	}
105 
Renderer(Context * context,Conventions conventions,bool exactColorRounding)106 	Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport()
107 	{
108 		sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates;
109 		sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth;
110 		sw::booleanFaceRegister = conventions.booleanFaceRegister;
111 		sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister;
112 		sw::leadingVertexFirst = conventions.leadingVertexFirst;
113 		sw::secondaryColor = conventions.secondaryColor;
114 		sw::colorsDefaultToZero = conventions.colorsDefaultToZero;
115 		sw::exactColorRounding = exactColorRounding;
116 
117 		setRenderTarget(0, 0);
118 		clipper = new Clipper(symmetricNormalizedDepth);
119 		blitter = new Blitter;
120 
121 		updateViewMatrix = true;
122 		updateBaseMatrix = true;
123 		updateProjectionMatrix = true;
124 		updateClipPlanes = true;
125 
126 		#if PERF_HUD
127 			resetTimers();
128 		#endif
129 
130 		for(int i = 0; i < 16; i++)
131 		{
132 			vertexTask[i] = 0;
133 
134 			worker[i] = 0;
135 			resume[i] = 0;
136 			suspend[i] = 0;
137 		}
138 
139 		threadsAwake = 0;
140 		resumeApp = new Event();
141 
142 		currentDraw = 0;
143 		nextDraw = 0;
144 
145 		qHead = 0;
146 		qSize = 0;
147 
148 		for(int i = 0; i < 16; i++)
149 		{
150 			triangleBatch[i] = 0;
151 			primitiveBatch[i] = 0;
152 		}
153 
154 		for(int draw = 0; draw < DRAW_COUNT; draw++)
155 		{
156 			drawCall[draw] = new DrawCall();
157 			drawList[draw] = drawCall[draw];
158 		}
159 
160 		for(int unit = 0; unit < 16; unit++)
161 		{
162 			primitiveProgress[unit].init();
163 		}
164 
165 		for(int cluster = 0; cluster < 16; cluster++)
166 		{
167 			pixelProgress[cluster].init();
168 		}
169 
170 		clipFlags = 0;
171 
172 		swiftConfig = new SwiftConfig(disableServer);
173 		updateConfiguration(true);
174 
175 		sync = new Resource(0);
176 	}
177 
~Renderer()178 	Renderer::~Renderer()
179 	{
180 		sync->destruct();
181 
182 		delete clipper;
183 		clipper = nullptr;
184 
185 		delete blitter;
186 		blitter = nullptr;
187 
188 		terminateThreads();
189 		delete resumeApp;
190 
191 		for(int draw = 0; draw < DRAW_COUNT; draw++)
192 		{
193 			delete drawCall[draw];
194 		}
195 
196 		delete swiftConfig;
197 	}
198 
199 	// This object has to be mem aligned
operator new(size_t size)200 	void* Renderer::operator new(size_t size)
201 	{
202 		ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class
203 		return sw::allocate(sizeof(Renderer), 16);
204 	}
205 
operator delete(void * mem)206 	void Renderer::operator delete(void * mem)
207 	{
208 		sw::deallocate(mem);
209 	}
210 
draw(DrawType drawType,unsigned int indexOffset,unsigned int count,bool update)211 	void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update)
212 	{
213 		#ifndef NDEBUG
214 			if(count < minPrimitives || count > maxPrimitives)
215 			{
216 				return;
217 			}
218 		#endif
219 
220 		context->drawType = drawType;
221 
222 		updateConfiguration();
223 		updateClipper();
224 
225 		int ss = context->getSuperSampleCount();
226 		int ms = context->getMultiSampleCount();
227 
228 		for(int q = 0; q < ss; q++)
229 		{
230 			unsigned int oldMultiSampleMask = context->multiSampleMask;
231 			context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms));
232 
233 			if(!context->multiSampleMask)
234 			{
235 				continue;
236 			}
237 
238 			sync->lock(sw::PRIVATE);
239 
240 			if(update || oldMultiSampleMask != context->multiSampleMask)
241 			{
242 				vertexState = VertexProcessor::update(drawType);
243 				setupState = SetupProcessor::update();
244 				pixelState = PixelProcessor::update();
245 
246 				vertexRoutine = VertexProcessor::routine(vertexState);
247 				setupRoutine = SetupProcessor::routine(setupState);
248 				pixelRoutine = PixelProcessor::routine(pixelState);
249 			}
250 
251 			int batch = batchSize / ms;
252 
253 			int (Renderer::*setupPrimitives)(int batch, int count);
254 
255 			if(context->isDrawTriangle())
256 			{
257 				switch(context->fillMode)
258 				{
259 				case FILL_SOLID:
260 					setupPrimitives = &Renderer::setupSolidTriangles;
261 					break;
262 				case FILL_WIREFRAME:
263 					setupPrimitives = &Renderer::setupWireframeTriangle;
264 					batch = 1;
265 					break;
266 				case FILL_VERTEX:
267 					setupPrimitives = &Renderer::setupVertexTriangle;
268 					batch = 1;
269 					break;
270 				default:
271 					ASSERT(false);
272 					return;
273 				}
274 			}
275 			else if(context->isDrawLine())
276 			{
277 				setupPrimitives = &Renderer::setupLines;
278 			}
279 			else   // Point draw
280 			{
281 				setupPrimitives = &Renderer::setupPoints;
282 			}
283 
284 			DrawCall *draw = 0;
285 
286 			do
287 			{
288 				for(int i = 0; i < DRAW_COUNT; i++)
289 				{
290 					if(drawCall[i]->references == -1)
291 					{
292 						draw = drawCall[i];
293 						drawList[nextDraw % DRAW_COUNT] = draw;
294 
295 						break;
296 					}
297 				}
298 
299 				if(!draw)
300 				{
301 					resumeApp->wait();
302 				}
303 			}
304 			while(!draw);
305 
306 			DrawData *data = draw->data;
307 
308 			if(queries.size() != 0)
309 			{
310 				draw->queries = new std::list<Query*>();
311 				bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled;
312 				for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++)
313 				{
314 					Query* q = *query;
315 					if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN))
316 					{
317 						atomicIncrement(&(q->reference));
318 						draw->queries->push_back(q);
319 					}
320 				}
321 			}
322 
323 			draw->drawType = drawType;
324 			draw->batchSize = batch;
325 
326 			vertexRoutine->bind();
327 			setupRoutine->bind();
328 			pixelRoutine->bind();
329 
330 			draw->vertexRoutine = vertexRoutine;
331 			draw->setupRoutine = setupRoutine;
332 			draw->pixelRoutine = pixelRoutine;
333 			draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry();
334 			draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry();
335 			draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry();
336 			draw->setupPrimitives = setupPrimitives;
337 			draw->setupState = setupState;
338 
339 			for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
340 			{
341 				draw->vertexStream[i] = context->input[i].resource;
342 				data->input[i] = context->input[i].buffer;
343 				data->stride[i] = context->input[i].stride;
344 
345 				if(draw->vertexStream[i])
346 				{
347 					draw->vertexStream[i]->lock(PUBLIC, PRIVATE);
348 				}
349 			}
350 
351 			if(context->indexBuffer)
352 			{
353 				data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset;
354 			}
355 
356 			draw->indexBuffer = context->indexBuffer;
357 
358 			for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++)
359 			{
360 				draw->texture[sampler] = 0;
361 			}
362 
363 			for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++)
364 			{
365 				if(pixelState.sampler[sampler].textureType != TEXTURE_NULL)
366 				{
367 					draw->texture[sampler] = context->texture[sampler];
368 					draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE);   // If the texure is both read and written, use the same read/write lock as render targets
369 
370 					data->mipmap[sampler] = context->sampler[sampler].getTextureData();
371 				}
372 			}
373 
374 			if(context->pixelShader)
375 			{
376 				if(draw->psDirtyConstF)
377 				{
378 					memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8));
379 					memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF);
380 					draw->psDirtyConstF = 0;
381 				}
382 
383 				if(draw->psDirtyConstI)
384 				{
385 					memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI);
386 					draw->psDirtyConstI = 0;
387 				}
388 
389 				if(draw->psDirtyConstB)
390 				{
391 					memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB);
392 					draw->psDirtyConstB = 0;
393 				}
394 
395 				PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers);
396 			}
397 			else
398 			{
399 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
400 				{
401 					draw->pUniformBuffers[i] = nullptr;
402 				}
403 			}
404 
405 			if(context->pixelShaderVersion() <= 0x0104)
406 			{
407 				for(int stage = 0; stage < 8; stage++)
408 				{
409 					if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader)
410 					{
411 						data->textureStage[stage] = context->textureStage[stage].uniforms;
412 					}
413 					else break;
414 				}
415 			}
416 
417 			if(context->vertexShader)
418 			{
419 				if(context->vertexShader->getVersion() >= 0x0300)
420 				{
421 					for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++)
422 					{
423 						if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL)
424 						{
425 							draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler];
426 							draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE);
427 
428 							data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData();
429 						}
430 					}
431 				}
432 
433 				if(draw->vsDirtyConstF)
434 				{
435 					memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF);
436 					draw->vsDirtyConstF = 0;
437 				}
438 
439 				if(draw->vsDirtyConstI)
440 				{
441 					memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI);
442 					draw->vsDirtyConstI = 0;
443 				}
444 
445 				if(draw->vsDirtyConstB)
446 				{
447 					memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB);
448 					draw->vsDirtyConstB = 0;
449 				}
450 
451 				if(context->vertexShader->isInstanceIdDeclared())
452 				{
453 					data->instanceID = context->instanceID;
454 				}
455 
456 				VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers);
457 				VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers);
458 			}
459 			else
460 			{
461 				data->ff = ff;
462 
463 				draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1;
464 				draw->vsDirtyConstI = 16;
465 				draw->vsDirtyConstB = 16;
466 
467 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
468 				{
469 					draw->vUniformBuffers[i] = nullptr;
470 				}
471 
472 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
473 				{
474 					draw->transformFeedbackBuffers[i] = nullptr;
475 				}
476 			}
477 
478 			if(pixelState.stencilActive)
479 			{
480 				data->stencil[0] = stencil;
481 				data->stencil[1] = stencilCCW;
482 			}
483 
484 			if(pixelState.fogActive)
485 			{
486 				data->fog = fog;
487 			}
488 
489 			if(setupState.isDrawPoint)
490 			{
491 				data->point = point;
492 			}
493 
494 			data->lineWidth = context->lineWidth;
495 
496 			data->factor = factor;
497 
498 			if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
499 			{
500 				float ref = context->alphaReference * (1.0f / 255.0f);
501 				float margin = sw::min(ref, 1.0f - ref);
502 
503 				if(ms == 4)
504 				{
505 					data->a2c0 = replicate(ref - margin * 0.6f);
506 					data->a2c1 = replicate(ref - margin * 0.2f);
507 					data->a2c2 = replicate(ref + margin * 0.2f);
508 					data->a2c3 = replicate(ref + margin * 0.6f);
509 				}
510 				else if(ms == 2)
511 				{
512 					data->a2c0 = replicate(ref - margin * 0.3f);
513 					data->a2c1 = replicate(ref + margin * 0.3f);
514 				}
515 				else ASSERT(false);
516 			}
517 
518 			if(pixelState.occlusionEnabled)
519 			{
520 				for(int cluster = 0; cluster < clusterCount; cluster++)
521 				{
522 					data->occlusion[cluster] = 0;
523 				}
524 			}
525 
526 			#if PERF_PROFILE
527 				for(int cluster = 0; cluster < clusterCount; cluster++)
528 				{
529 					for(int i = 0; i < PERF_TIMERS; i++)
530 					{
531 						data->cycles[i][cluster] = 0;
532 					}
533 				}
534 			#endif
535 
536 			// Viewport
537 			{
538 				float W = 0.5f * viewport.width;
539 				float H = 0.5f * viewport.height;
540 				float X0 = viewport.x0 + W;
541 				float Y0 = viewport.y0 + H;
542 				float N = viewport.minZ;
543 				float F = viewport.maxZ;
544 				float Z = F - N;
545 
546 				if(context->isDrawTriangle(false))
547 				{
548 					N += depthBias;
549 				}
550 
551 				if(complementaryDepthBuffer)
552 				{
553 					Z = -Z;
554 					N = 1 - N;
555 				}
556 
557 				static const float X[5][16] =   // Fragment offsets
558 				{
559 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
560 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
561 					{-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
562 					{+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
563 					{+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f}    // 16 samples
564 				};
565 
566 				static const float Y[5][16] =   // Fragment offsets
567 				{
568 					{+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 1 sample
569 					{-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 2 samples
570 					{-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 4 samples
571 					{-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f},   // 8 samples
572 					{-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f}    // 16 samples
573 				};
574 
575 				int s = sw::log2(ss);
576 
577 				data->Wx16 = replicate(W * 16);
578 				data->Hx16 = replicate(H * 16);
579 				data->X0x16 = replicate(X0 * 16 - 8);
580 				data->Y0x16 = replicate(Y0 * 16 - 8);
581 				data->XXXX = replicate(X[s][q] / W);
582 				data->YYYY = replicate(Y[s][q] / H);
583 				data->halfPixelX = replicate(0.5f / W);
584 				data->halfPixelY = replicate(0.5f / H);
585 				data->viewportHeight = abs(viewport.height);
586 				data->slopeDepthBias = slopeDepthBias;
587 				data->depthRange = Z;
588 				data->depthNear = N;
589 				draw->clipFlags = clipFlags;
590 
591 				if(clipFlags)
592 				{
593 					if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0];
594 					if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1];
595 					if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2];
596 					if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3];
597 					if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4];
598 					if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5];
599 				}
600 			}
601 
602 			// Target
603 			{
604 				for(int index = 0; index < RENDERTARGETS; index++)
605 				{
606 					draw->renderTarget[index] = context->renderTarget[index];
607 
608 					if(draw->renderTarget[index])
609 					{
610 						data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
611 						data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB();
612 						data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB();
613 					}
614 				}
615 
616 				draw->depthBuffer = context->depthBuffer;
617 				draw->stencilBuffer = context->stencilBuffer;
618 
619 				if(draw->depthBuffer)
620 				{
621 					data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED);
622 					data->depthPitchB = context->depthBuffer->getInternalPitchB();
623 					data->depthSliceB = context->depthBuffer->getInternalSliceB();
624 				}
625 
626 				if(draw->stencilBuffer)
627 				{
628 					data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED);
629 					data->stencilPitchB = context->stencilBuffer->getStencilPitchB();
630 					data->stencilSliceB = context->stencilBuffer->getStencilSliceB();
631 				}
632 			}
633 
634 			// Scissor
635 			{
636 				data->scissorX0 = scissor.x0;
637 				data->scissorX1 = scissor.x1;
638 				data->scissorY0 = scissor.y0;
639 				data->scissorY1 = scissor.y1;
640 			}
641 
642 			draw->primitive = 0;
643 			draw->count = count;
644 
645 			draw->references = (count + batch - 1) / batch;
646 
647 			schedulerMutex.lock();
648 			nextDraw++;
649 			schedulerMutex.unlock();
650 
651 			#ifndef NDEBUG
652 			if(threadCount == 1)   // Use main thread for draw execution
653 			{
654 				threadsAwake = 1;
655 				task[0].type = Task::RESUME;
656 
657 				taskLoop(0);
658 			}
659 			else
660 			#endif
661 			{
662 				if(!threadsAwake)
663 				{
664 					suspend[0]->wait();
665 
666 					threadsAwake = 1;
667 					task[0].type = Task::RESUME;
668 
669 					resume[0]->signal();
670 				}
671 			}
672 		}
673 	}
674 
clear(void * value,Format format,Surface * dest,const Rect & clearRect,unsigned int rgbaMask)675 	void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask)
676 	{
677 		SliceRect rect = clearRect;
678 		int samples = dest->getDepth();
679 
680 		for(rect.slice = 0; rect.slice < samples; rect.slice++)
681 		{
682 			blitter->clear(value, format, dest, rect, rgbaMask);
683 		}
684 	}
685 
blit(Surface * source,const SliceRect & sRect,Surface * dest,const SliceRect & dRect,bool filter,bool isStencil)686 	void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil)
687 	{
688 		blitter->blit(source, sRect, dest, dRect, filter, isStencil);
689 	}
690 
blit3D(Surface * source,Surface * dest)691 	void Renderer::blit3D(Surface *source, Surface *dest)
692 	{
693 		blitter->blit3D(source, dest);
694 	}
695 
threadFunction(void * parameters)696 	void Renderer::threadFunction(void *parameters)
697 	{
698 		Renderer *renderer = static_cast<Parameters*>(parameters)->renderer;
699 		int threadIndex = static_cast<Parameters*>(parameters)->threadIndex;
700 
701 		if(logPrecision < IEEE)
702 		{
703 			CPUID::setFlushToZero(true);
704 			CPUID::setDenormalsAreZero(true);
705 		}
706 
707 		renderer->threadLoop(threadIndex);
708 	}
709 
threadLoop(int threadIndex)710 	void Renderer::threadLoop(int threadIndex)
711 	{
712 		while(!exitThreads)
713 		{
714 			taskLoop(threadIndex);
715 
716 			suspend[threadIndex]->signal();
717 			resume[threadIndex]->wait();
718 		}
719 	}
720 
taskLoop(int threadIndex)721 	void Renderer::taskLoop(int threadIndex)
722 	{
723 		while(task[threadIndex].type != Task::SUSPEND)
724 		{
725 			scheduleTask(threadIndex);
726 			executeTask(threadIndex);
727 		}
728 	}
729 
findAvailableTasks()730 	void Renderer::findAvailableTasks()
731 	{
732 		// Find pixel tasks
733 		for(int cluster = 0; cluster < clusterCount; cluster++)
734 		{
735 			if(!pixelProgress[cluster].executing)
736 			{
737 				for(int unit = 0; unit < unitCount; unit++)
738 				{
739 					if(primitiveProgress[unit].references > 0)   // Contains processed primitives
740 					{
741 						if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall)
742 						{
743 							if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive)   // Previous primitives have been rendered
744 							{
745 								Task &task = taskQueue[qHead];
746 								task.type = Task::PIXELS;
747 								task.primitiveUnit = unit;
748 								task.pixelCluster = cluster;
749 
750 								pixelProgress[cluster].executing = true;
751 
752 								// Commit to the task queue
753 								qHead = (qHead + 1) % 32;
754 								qSize++;
755 
756 								break;
757 							}
758 						}
759 					}
760 				}
761 			}
762 		}
763 
764 		// Find primitive tasks
765 		if(currentDraw == nextDraw)
766 		{
767 			return;   // No more primitives to process
768 		}
769 
770 		for(int unit = 0; unit < unitCount; unit++)
771 		{
772 			DrawCall *draw = drawList[currentDraw % DRAW_COUNT];
773 
774 			if(draw->primitive >= draw->count)
775 			{
776 				currentDraw++;
777 
778 				if(currentDraw == nextDraw)
779 				{
780 					return;   // No more primitives to process
781 				}
782 
783 				draw = drawList[currentDraw % DRAW_COUNT];
784 			}
785 
786 			if(!primitiveProgress[unit].references)   // Task not already being executed and not still in use by a pixel unit
787 			{
788 				int primitive = draw->primitive;
789 				int count = draw->count;
790 				int batch = draw->batchSize;
791 
792 				primitiveProgress[unit].drawCall = currentDraw;
793 				primitiveProgress[unit].firstPrimitive = primitive;
794 				primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive;
795 
796 				draw->primitive += batch;
797 
798 				Task &task = taskQueue[qHead];
799 				task.type = Task::PRIMITIVES;
800 				task.primitiveUnit = unit;
801 
802 				primitiveProgress[unit].references = -1;
803 
804 				// Commit to the task queue
805 				qHead = (qHead + 1) % 32;
806 				qSize++;
807 			}
808 		}
809 	}
810 
scheduleTask(int threadIndex)811 	void Renderer::scheduleTask(int threadIndex)
812 	{
813 		schedulerMutex.lock();
814 
815 		if((int)qSize < threadCount - threadsAwake + 1)
816 		{
817 			findAvailableTasks();
818 		}
819 
820 		if(qSize != 0)
821 		{
822 			task[threadIndex] = taskQueue[(qHead - qSize) % 32];
823 			qSize--;
824 
825 			if(threadsAwake != threadCount)
826 			{
827 				int wakeup = qSize - threadsAwake + 1;
828 
829 				for(int i = 0; i < threadCount && wakeup > 0; i++)
830 				{
831 					if(task[i].type == Task::SUSPEND)
832 					{
833 						suspend[i]->wait();
834 						task[i].type = Task::RESUME;
835 						resume[i]->signal();
836 
837 						threadsAwake++;
838 						wakeup--;
839 					}
840 				}
841 			}
842 		}
843 		else
844 		{
845 			task[threadIndex].type = Task::SUSPEND;
846 
847 			threadsAwake--;
848 		}
849 
850 		schedulerMutex.unlock();
851 	}
852 
executeTask(int threadIndex)853 	void Renderer::executeTask(int threadIndex)
854 	{
855 		#if PERF_HUD
856 			int64_t startTick = Timer::ticks();
857 		#endif
858 
859 		switch(task[threadIndex].type)
860 		{
861 		case Task::PRIMITIVES:
862 			{
863 				int unit = task[threadIndex].primitiveUnit;
864 
865 				int input = primitiveProgress[unit].firstPrimitive;
866 				int count = primitiveProgress[unit].primitiveCount;
867 				DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
868 				int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives;
869 
870 				processPrimitiveVertices(unit, input, count, draw->count, threadIndex);
871 
872 				#if PERF_HUD
873 					int64_t time = Timer::ticks();
874 					vertexTime[threadIndex] += time - startTick;
875 					startTick = time;
876 				#endif
877 
878 				int visible = 0;
879 
880 				if(!draw->setupState.rasterizerDiscard)
881 				{
882 					visible = (this->*setupPrimitives)(unit, count);
883 				}
884 
885 				primitiveProgress[unit].visible = visible;
886 				primitiveProgress[unit].references = clusterCount;
887 
888 				#if PERF_HUD
889 					setupTime[threadIndex] += Timer::ticks() - startTick;
890 				#endif
891 			}
892 			break;
893 		case Task::PIXELS:
894 			{
895 				int unit = task[threadIndex].primitiveUnit;
896 				int visible = primitiveProgress[unit].visible;
897 
898 				if(visible > 0)
899 				{
900 					int cluster = task[threadIndex].pixelCluster;
901 					Primitive *primitive = primitiveBatch[unit];
902 					DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT];
903 					DrawData *data = draw->data;
904 					PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer;
905 
906 					pixelRoutine(primitive, visible, cluster, data);
907 				}
908 
909 				finishRendering(task[threadIndex]);
910 
911 				#if PERF_HUD
912 					pixelTime[threadIndex] += Timer::ticks() - startTick;
913 				#endif
914 			}
915 			break;
916 		case Task::RESUME:
917 			break;
918 		case Task::SUSPEND:
919 			break;
920 		default:
921 			ASSERT(false);
922 		}
923 	}
924 
synchronize()925 	void Renderer::synchronize()
926 	{
927 		sync->lock(sw::PUBLIC);
928 		sync->unlock();
929 	}
930 
finishRendering(Task & pixelTask)931 	void Renderer::finishRendering(Task &pixelTask)
932 	{
933 		int unit = pixelTask.primitiveUnit;
934 		int cluster = pixelTask.pixelCluster;
935 
936 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
937 		DrawData &data = *draw.data;
938 		int primitive = primitiveProgress[unit].firstPrimitive;
939 		int count = primitiveProgress[unit].primitiveCount;
940 		int processedPrimitives = primitive + count;
941 
942 		pixelProgress[cluster].processedPrimitives = processedPrimitives;
943 
944 		if(pixelProgress[cluster].processedPrimitives >= draw.count)
945 		{
946 			pixelProgress[cluster].drawCall++;
947 			pixelProgress[cluster].processedPrimitives = 0;
948 		}
949 
950 		int ref = atomicDecrement(&primitiveProgress[unit].references);
951 
952 		if(ref == 0)
953 		{
954 			ref = atomicDecrement(&draw.references);
955 
956 			if(ref == 0)
957 			{
958 				#if PERF_PROFILE
959 					for(int cluster = 0; cluster < clusterCount; cluster++)
960 					{
961 						for(int i = 0; i < PERF_TIMERS; i++)
962 						{
963 							profiler.cycles[i] += data.cycles[i][cluster];
964 						}
965 					}
966 				#endif
967 
968 				if(draw.queries)
969 				{
970 					for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++)
971 					{
972 						Query *query = *q;
973 
974 						switch(query->type)
975 						{
976 						case Query::FRAGMENTS_PASSED:
977 							for(int cluster = 0; cluster < clusterCount; cluster++)
978 							{
979 								atomicAdd((volatile int*)&query->data, data.occlusion[cluster]);
980 							}
981 							break;
982 						case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN:
983 							atomicAdd((volatile int*)&query->data, processedPrimitives);
984 							break;
985 						default:
986 							break;
987 						}
988 
989 						atomicDecrement(&query->reference);
990 					}
991 
992 					delete draw.queries;
993 					draw.queries = 0;
994 				}
995 
996 				for(int i = 0; i < RENDERTARGETS; i++)
997 				{
998 					if(draw.renderTarget[i])
999 					{
1000 						draw.renderTarget[i]->unlockInternal();
1001 					}
1002 				}
1003 
1004 				if(draw.depthBuffer)
1005 				{
1006 					draw.depthBuffer->unlockInternal();
1007 				}
1008 
1009 				if(draw.stencilBuffer)
1010 				{
1011 					draw.stencilBuffer->unlockStencil();
1012 				}
1013 
1014 				for(int i = 0; i < TOTAL_IMAGE_UNITS; i++)
1015 				{
1016 					if(draw.texture[i])
1017 					{
1018 						draw.texture[i]->unlock();
1019 					}
1020 				}
1021 
1022 				for(int i = 0; i < MAX_VERTEX_INPUTS; i++)
1023 				{
1024 					if(draw.vertexStream[i])
1025 					{
1026 						draw.vertexStream[i]->unlock();
1027 					}
1028 				}
1029 
1030 				if(draw.indexBuffer)
1031 				{
1032 					draw.indexBuffer->unlock();
1033 				}
1034 
1035 				for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++)
1036 				{
1037 					if(draw.pUniformBuffers[i])
1038 					{
1039 						draw.pUniformBuffers[i]->unlock();
1040 					}
1041 					if(draw.vUniformBuffers[i])
1042 					{
1043 						draw.vUniformBuffers[i]->unlock();
1044 					}
1045 				}
1046 
1047 				for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++)
1048 				{
1049 					if(draw.transformFeedbackBuffers[i])
1050 					{
1051 						draw.transformFeedbackBuffers[i]->unlock();
1052 					}
1053 				}
1054 
1055 				draw.vertexRoutine->unbind();
1056 				draw.setupRoutine->unbind();
1057 				draw.pixelRoutine->unbind();
1058 
1059 				sync->unlock();
1060 
1061 				draw.references = -1;
1062 				resumeApp->signal();
1063 			}
1064 		}
1065 
1066 		pixelProgress[cluster].executing = false;
1067 	}
1068 
processPrimitiveVertices(int unit,unsigned int start,unsigned int triangleCount,unsigned int loop,int thread)1069 	void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread)
1070 	{
1071 		Triangle *triangle = triangleBatch[unit];
1072 		DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1073 		DrawData *data = draw->data;
1074 		VertexTask *task = vertexTask[thread];
1075 
1076 		const void *indices = data->indices;
1077 		VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer;
1078 
1079 		if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall)
1080 		{
1081 			task->vertexCache.clear();
1082 			task->vertexCache.drawCall = primitiveProgress[unit].drawCall;
1083 		}
1084 
1085 		unsigned int batch[128][3];   // FIXME: Adjust to dynamic batch size
1086 
1087 		switch(draw->drawType)
1088 		{
1089 		case DRAW_POINTLIST:
1090 			{
1091 				unsigned int index = start;
1092 
1093 				for(unsigned int i = 0; i < triangleCount; i++)
1094 				{
1095 					batch[i][0] = index;
1096 					batch[i][1] = index;
1097 					batch[i][2] = index;
1098 
1099 					index += 1;
1100 				}
1101 			}
1102 			break;
1103 		case DRAW_LINELIST:
1104 			{
1105 				unsigned int index = 2 * start;
1106 
1107 				for(unsigned int i = 0; i < triangleCount; i++)
1108 				{
1109 					batch[i][0] = index + 0;
1110 					batch[i][1] = index + 1;
1111 					batch[i][2] = index + 1;
1112 
1113 					index += 2;
1114 				}
1115 			}
1116 			break;
1117 		case DRAW_LINESTRIP:
1118 			{
1119 				unsigned int index = start;
1120 
1121 				for(unsigned int i = 0; i < triangleCount; i++)
1122 				{
1123 					batch[i][0] = index + 0;
1124 					batch[i][1] = index + 1;
1125 					batch[i][2] = index + 1;
1126 
1127 					index += 1;
1128 				}
1129 			}
1130 			break;
1131 		case DRAW_LINELOOP:
1132 			{
1133 				unsigned int index = start;
1134 
1135 				for(unsigned int i = 0; i < triangleCount; i++)
1136 				{
1137 					batch[i][0] = (index + 0) % loop;
1138 					batch[i][1] = (index + 1) % loop;
1139 					batch[i][2] = (index + 1) % loop;
1140 
1141 					index += 1;
1142 				}
1143 			}
1144 			break;
1145 		case DRAW_TRIANGLELIST:
1146 			{
1147 				unsigned int index = 3 * start;
1148 
1149 				for(unsigned int i = 0; i < triangleCount; i++)
1150 				{
1151 					batch[i][0] = index + 0;
1152 					batch[i][1] = index + 1;
1153 					batch[i][2] = index + 2;
1154 
1155 					index += 3;
1156 				}
1157 			}
1158 			break;
1159 		case DRAW_TRIANGLESTRIP:
1160 			{
1161 				unsigned int index = start;
1162 
1163 				for(unsigned int i = 0; i < triangleCount; i++)
1164 				{
1165 					batch[i][0] = index + 0;
1166 					batch[i][1] = index + (index & 1) + 1;
1167 					batch[i][2] = index + (~index & 1) + 1;
1168 
1169 					index += 1;
1170 				}
1171 			}
1172 			break;
1173 		case DRAW_TRIANGLEFAN:
1174 			{
1175 				unsigned int index = start;
1176 
1177 				for(unsigned int i = 0; i < triangleCount; i++)
1178 				{
1179 					batch[i][0] = index + 1;
1180 					batch[i][1] = index + 2;
1181 					batch[i][2] = 0;
1182 
1183 					index += 1;
1184 				}
1185 			}
1186 			break;
1187 		case DRAW_INDEXEDPOINTLIST8:
1188 			{
1189 				const unsigned char *index = (const unsigned char*)indices + start;
1190 
1191 				for(unsigned int i = 0; i < triangleCount; i++)
1192 				{
1193 					batch[i][0] = *index;
1194 					batch[i][1] = *index;
1195 					batch[i][2] = *index;
1196 
1197 					index += 1;
1198 				}
1199 			}
1200 			break;
1201 		case DRAW_INDEXEDPOINTLIST16:
1202 			{
1203 				const unsigned short *index = (const unsigned short*)indices + start;
1204 
1205 				for(unsigned int i = 0; i < triangleCount; i++)
1206 				{
1207 					batch[i][0] = *index;
1208 					batch[i][1] = *index;
1209 					batch[i][2] = *index;
1210 
1211 					index += 1;
1212 				}
1213 			}
1214 			break;
1215 		case DRAW_INDEXEDPOINTLIST32:
1216 			{
1217 				const unsigned int *index = (const unsigned int*)indices + start;
1218 
1219 				for(unsigned int i = 0; i < triangleCount; i++)
1220 				{
1221 					batch[i][0] = *index;
1222 					batch[i][1] = *index;
1223 					batch[i][2] = *index;
1224 
1225 					index += 1;
1226 				}
1227 			}
1228 			break;
1229 		case DRAW_INDEXEDLINELIST8:
1230 			{
1231 				const unsigned char *index = (const unsigned char*)indices + 2 * start;
1232 
1233 				for(unsigned int i = 0; i < triangleCount; i++)
1234 				{
1235 					batch[i][0] = index[0];
1236 					batch[i][1] = index[1];
1237 					batch[i][2] = index[1];
1238 
1239 					index += 2;
1240 				}
1241 			}
1242 			break;
1243 		case DRAW_INDEXEDLINELIST16:
1244 			{
1245 				const unsigned short *index = (const unsigned short*)indices + 2 * start;
1246 
1247 				for(unsigned int i = 0; i < triangleCount; i++)
1248 				{
1249 					batch[i][0] = index[0];
1250 					batch[i][1] = index[1];
1251 					batch[i][2] = index[1];
1252 
1253 					index += 2;
1254 				}
1255 			}
1256 			break;
1257 		case DRAW_INDEXEDLINELIST32:
1258 			{
1259 				const unsigned int *index = (const unsigned int*)indices + 2 * start;
1260 
1261 				for(unsigned int i = 0; i < triangleCount; i++)
1262 				{
1263 					batch[i][0] = index[0];
1264 					batch[i][1] = index[1];
1265 					batch[i][2] = index[1];
1266 
1267 					index += 2;
1268 				}
1269 			}
1270 			break;
1271 		case DRAW_INDEXEDLINESTRIP8:
1272 			{
1273 				const unsigned char *index = (const unsigned char*)indices + start;
1274 
1275 				for(unsigned int i = 0; i < triangleCount; i++)
1276 				{
1277 					batch[i][0] = index[0];
1278 					batch[i][1] = index[1];
1279 					batch[i][2] = index[1];
1280 
1281 					index += 1;
1282 				}
1283 			}
1284 			break;
1285 		case DRAW_INDEXEDLINESTRIP16:
1286 			{
1287 				const unsigned short *index = (const unsigned short*)indices + start;
1288 
1289 				for(unsigned int i = 0; i < triangleCount; i++)
1290 				{
1291 					batch[i][0] = index[0];
1292 					batch[i][1] = index[1];
1293 					batch[i][2] = index[1];
1294 
1295 					index += 1;
1296 				}
1297 			}
1298 			break;
1299 		case DRAW_INDEXEDLINESTRIP32:
1300 			{
1301 				const unsigned int *index = (const unsigned int*)indices + start;
1302 
1303 				for(unsigned int i = 0; i < triangleCount; i++)
1304 				{
1305 					batch[i][0] = index[0];
1306 					batch[i][1] = index[1];
1307 					batch[i][2] = index[1];
1308 
1309 					index += 1;
1310 				}
1311 			}
1312 			break;
1313 		case DRAW_INDEXEDLINELOOP8:
1314 			{
1315 				const unsigned char *index = (const unsigned char*)indices;
1316 
1317 				for(unsigned int i = 0; i < triangleCount; i++)
1318 				{
1319 					batch[i][0] = index[(start + i + 0) % loop];
1320 					batch[i][1] = index[(start + i + 1) % loop];
1321 					batch[i][2] = index[(start + i + 1) % loop];
1322 				}
1323 			}
1324 			break;
1325 		case DRAW_INDEXEDLINELOOP16:
1326 			{
1327 				const unsigned short *index = (const unsigned short*)indices;
1328 
1329 				for(unsigned int i = 0; i < triangleCount; i++)
1330 				{
1331 					batch[i][0] = index[(start + i + 0) % loop];
1332 					batch[i][1] = index[(start + i + 1) % loop];
1333 					batch[i][2] = index[(start + i + 1) % loop];
1334 				}
1335 			}
1336 			break;
1337 		case DRAW_INDEXEDLINELOOP32:
1338 			{
1339 				const unsigned int *index = (const unsigned int*)indices;
1340 
1341 				for(unsigned int i = 0; i < triangleCount; i++)
1342 				{
1343 					batch[i][0] = index[(start + i + 0) % loop];
1344 					batch[i][1] = index[(start + i + 1) % loop];
1345 					batch[i][2] = index[(start + i + 1) % loop];
1346 				}
1347 			}
1348 			break;
1349 		case DRAW_INDEXEDTRIANGLELIST8:
1350 			{
1351 				const unsigned char *index = (const unsigned char*)indices + 3 * start;
1352 
1353 				for(unsigned int i = 0; i < triangleCount; i++)
1354 				{
1355 					batch[i][0] = index[0];
1356 					batch[i][1] = index[1];
1357 					batch[i][2] = index[2];
1358 
1359 					index += 3;
1360 				}
1361 			}
1362 			break;
1363 		case DRAW_INDEXEDTRIANGLELIST16:
1364 			{
1365 				const unsigned short *index = (const unsigned short*)indices + 3 * start;
1366 
1367 				for(unsigned int i = 0; i < triangleCount; i++)
1368 				{
1369 					batch[i][0] = index[0];
1370 					batch[i][1] = index[1];
1371 					batch[i][2] = index[2];
1372 
1373 					index += 3;
1374 				}
1375 			}
1376 			break;
1377 		case DRAW_INDEXEDTRIANGLELIST32:
1378 			{
1379 				const unsigned int *index = (const unsigned int*)indices + 3 * start;
1380 
1381 				for(unsigned int i = 0; i < triangleCount; i++)
1382 				{
1383 					batch[i][0] = index[0];
1384 					batch[i][1] = index[1];
1385 					batch[i][2] = index[2];
1386 
1387 					index += 3;
1388 				}
1389 			}
1390 			break;
1391 		case DRAW_INDEXEDTRIANGLESTRIP8:
1392 			{
1393 				const unsigned char *index = (const unsigned char*)indices + start;
1394 
1395 				for(unsigned int i = 0; i < triangleCount; i++)
1396 				{
1397 					batch[i][0] = index[0];
1398 					batch[i][1] = index[((start + i) & 1) + 1];
1399 					batch[i][2] = index[(~(start + i) & 1) + 1];
1400 
1401 					index += 1;
1402 				}
1403 			}
1404 			break;
1405 		case DRAW_INDEXEDTRIANGLESTRIP16:
1406 			{
1407 				const unsigned short *index = (const unsigned short*)indices + start;
1408 
1409 				for(unsigned int i = 0; i < triangleCount; i++)
1410 				{
1411 					batch[i][0] = index[0];
1412 					batch[i][1] = index[((start + i) & 1) + 1];
1413 					batch[i][2] = index[(~(start + i) & 1) + 1];
1414 
1415 					index += 1;
1416 				}
1417 			}
1418 			break;
1419 		case DRAW_INDEXEDTRIANGLESTRIP32:
1420 			{
1421 				const unsigned int *index = (const unsigned int*)indices + start;
1422 
1423 				for(unsigned int i = 0; i < triangleCount; i++)
1424 				{
1425 					batch[i][0] = index[0];
1426 					batch[i][1] = index[((start + i) & 1) + 1];
1427 					batch[i][2] = index[(~(start + i) & 1) + 1];
1428 
1429 					index += 1;
1430 				}
1431 			}
1432 			break;
1433 		case DRAW_INDEXEDTRIANGLEFAN8:
1434 			{
1435 				const unsigned char *index = (const unsigned char*)indices;
1436 
1437 				for(unsigned int i = 0; i < triangleCount; i++)
1438 				{
1439 					batch[i][0] = index[start + i + 1];
1440 					batch[i][1] = index[start + i + 2];
1441 					batch[i][2] = index[0];
1442 				}
1443 			}
1444 			break;
1445 		case DRAW_INDEXEDTRIANGLEFAN16:
1446 			{
1447 				const unsigned short *index = (const unsigned short*)indices;
1448 
1449 				for(unsigned int i = 0; i < triangleCount; i++)
1450 				{
1451 					batch[i][0] = index[start + i + 1];
1452 					batch[i][1] = index[start + i + 2];
1453 					batch[i][2] = index[0];
1454 				}
1455 			}
1456 			break;
1457 		case DRAW_INDEXEDTRIANGLEFAN32:
1458 			{
1459 				const unsigned int *index = (const unsigned int*)indices;
1460 
1461 				for(unsigned int i = 0; i < triangleCount; i++)
1462 				{
1463 					batch[i][0] = index[start + i + 1];
1464 					batch[i][1] = index[start + i + 2];
1465 					batch[i][2] = index[0];
1466 				}
1467 			}
1468 			break;
1469 		case DRAW_QUADLIST:
1470 			{
1471 				unsigned int index = 4 * start / 2;
1472 
1473 				for(unsigned int i = 0; i < triangleCount; i += 2)
1474 				{
1475 					batch[i+0][0] = index + 0;
1476 					batch[i+0][1] = index + 1;
1477 					batch[i+0][2] = index + 2;
1478 
1479 					batch[i+1][0] = index + 0;
1480 					batch[i+1][1] = index + 2;
1481 					batch[i+1][2] = index + 3;
1482 
1483 					index += 4;
1484 				}
1485 			}
1486 			break;
1487 		default:
1488 			ASSERT(false);
1489 			return;
1490 		}
1491 
1492 		task->primitiveStart = start;
1493 		task->vertexCount = triangleCount * 3;
1494 		vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data);
1495 	}
1496 
setupSolidTriangles(int unit,int count)1497 	int Renderer::setupSolidTriangles(int unit, int count)
1498 	{
1499 		Triangle *triangle = triangleBatch[unit];
1500 		Primitive *primitive = primitiveBatch[unit];
1501 
1502 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1503 		SetupProcessor::State &state = draw.setupState;
1504 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1505 
1506 		int ms = state.multiSample;
1507 		int pos = state.positionRegister;
1508 		const DrawData *data = draw.data;
1509 		int visible = 0;
1510 
1511 		for(int i = 0; i < count; i++, triangle++)
1512 		{
1513 			Vertex &v0 = triangle->v0;
1514 			Vertex &v1 = triangle->v1;
1515 			Vertex &v2 = triangle->v2;
1516 
1517 			if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE)
1518 			{
1519 				Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]);
1520 
1521 				int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags;
1522 
1523 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1524 				{
1525 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1526 					{
1527 						continue;
1528 					}
1529 				}
1530 
1531 				if(setupRoutine(primitive, triangle, &polygon, data))
1532 				{
1533 					primitive += ms;
1534 					visible++;
1535 				}
1536 			}
1537 		}
1538 
1539 		return visible;
1540 	}
1541 
setupWireframeTriangle(int unit,int count)1542 	int Renderer::setupWireframeTriangle(int unit, int count)
1543 	{
1544 		Triangle *triangle = triangleBatch[unit];
1545 		Primitive *primitive = primitiveBatch[unit];
1546 		int visible = 0;
1547 
1548 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1549 		SetupProcessor::State &state = draw.setupState;
1550 
1551 		const Vertex &v0 = triangle[0].v0;
1552 		const Vertex &v1 = triangle[0].v1;
1553 		const Vertex &v2 = triangle[0].v2;
1554 
1555 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1556 
1557 		if(state.cullMode == CULL_CLOCKWISE)
1558 		{
1559 			if(d >= 0) return 0;
1560 		}
1561 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1562 		{
1563 			if(d <= 0) return 0;
1564 		}
1565 
1566 		// Copy attributes
1567 		triangle[1].v0 = v1;
1568 		triangle[1].v1 = v2;
1569 		triangle[2].v0 = v2;
1570 		triangle[2].v1 = v0;
1571 
1572 		if(state.color[0][0].flat)   // FIXME
1573 		{
1574 			for(int i = 0; i < 2; i++)
1575 			{
1576 				triangle[1].v0.C[i] = triangle[0].v0.C[i];
1577 				triangle[1].v1.C[i] = triangle[0].v0.C[i];
1578 				triangle[2].v0.C[i] = triangle[0].v0.C[i];
1579 				triangle[2].v1.C[i] = triangle[0].v0.C[i];
1580 			}
1581 		}
1582 
1583 		for(int i = 0; i < 3; i++)
1584 		{
1585 			if(setupLine(*primitive, *triangle, draw))
1586 			{
1587 				primitive->area = 0.5f * d;
1588 
1589 				primitive++;
1590 				visible++;
1591 			}
1592 
1593 			triangle++;
1594 		}
1595 
1596 		return visible;
1597 	}
1598 
setupVertexTriangle(int unit,int count)1599 	int Renderer::setupVertexTriangle(int unit, int count)
1600 	{
1601 		Triangle *triangle = triangleBatch[unit];
1602 		Primitive *primitive = primitiveBatch[unit];
1603 		int visible = 0;
1604 
1605 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1606 		SetupProcessor::State &state = draw.setupState;
1607 
1608 		const Vertex &v0 = triangle[0].v0;
1609 		const Vertex &v1 = triangle[0].v1;
1610 		const Vertex &v2 = triangle[0].v2;
1611 
1612 		float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w;
1613 
1614 		if(state.cullMode == CULL_CLOCKWISE)
1615 		{
1616 			if(d >= 0) return 0;
1617 		}
1618 		else if(state.cullMode == CULL_COUNTERCLOCKWISE)
1619 		{
1620 			if(d <= 0) return 0;
1621 		}
1622 
1623 		// Copy attributes
1624 		triangle[1].v0 = v1;
1625 		triangle[2].v0 = v2;
1626 
1627 		for(int i = 0; i < 3; i++)
1628 		{
1629 			if(setupPoint(*primitive, *triangle, draw))
1630 			{
1631 				primitive->area = 0.5f * d;
1632 
1633 				primitive++;
1634 				visible++;
1635 			}
1636 
1637 			triangle++;
1638 		}
1639 
1640 		return visible;
1641 	}
1642 
setupLines(int unit,int count)1643 	int Renderer::setupLines(int unit, int count)
1644 	{
1645 		Triangle *triangle = triangleBatch[unit];
1646 		Primitive *primitive = primitiveBatch[unit];
1647 		int visible = 0;
1648 
1649 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1650 		SetupProcessor::State &state = draw.setupState;
1651 
1652 		int ms = state.multiSample;
1653 
1654 		for(int i = 0; i < count; i++)
1655 		{
1656 			if(setupLine(*primitive, *triangle, draw))
1657 			{
1658 				primitive += ms;
1659 				visible++;
1660 			}
1661 
1662 			triangle++;
1663 		}
1664 
1665 		return visible;
1666 	}
1667 
setupPoints(int unit,int count)1668 	int Renderer::setupPoints(int unit, int count)
1669 	{
1670 		Triangle *triangle = triangleBatch[unit];
1671 		Primitive *primitive = primitiveBatch[unit];
1672 		int visible = 0;
1673 
1674 		DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT];
1675 		SetupProcessor::State &state = draw.setupState;
1676 
1677 		int ms = state.multiSample;
1678 
1679 		for(int i = 0; i < count; i++)
1680 		{
1681 			if(setupPoint(*primitive, *triangle, draw))
1682 			{
1683 				primitive += ms;
1684 				visible++;
1685 			}
1686 
1687 			triangle++;
1688 		}
1689 
1690 		return visible;
1691 	}
1692 
setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1693 	bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1694 	{
1695 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1696 		const SetupProcessor::State &state = draw.setupState;
1697 		const DrawData &data = *draw.data;
1698 
1699 		float lineWidth = data.lineWidth;
1700 
1701 		Vertex &v0 = triangle.v0;
1702 		Vertex &v1 = triangle.v1;
1703 
1704 		int pos = state.positionRegister;
1705 
1706 		const float4 &P0 = v0.v[pos];
1707 		const float4 &P1 = v1.v[pos];
1708 
1709 		if(P0.w <= 0 && P1.w <= 0)
1710 		{
1711 			return false;
1712 		}
1713 
1714 		const float W = data.Wx16[0] * (1.0f / 16.0f);
1715 		const float H = data.Hx16[0] * (1.0f / 16.0f);
1716 
1717 		float dx = W * (P1.x / P1.w - P0.x / P0.w);
1718 		float dy = H * (P1.y / P1.w - P0.y / P0.w);
1719 
1720 		if(dx == 0 && dy == 0)
1721 		{
1722 			return false;
1723 		}
1724 
1725 		if(false)   // Rectangle
1726 		{
1727 			float4 P[4];
1728 			int C[4];
1729 
1730 			P[0] = P0;
1731 			P[1] = P1;
1732 			P[2] = P1;
1733 			P[3] = P0;
1734 
1735 			float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy);
1736 
1737 			dx *= scale;
1738 			dy *= scale;
1739 
1740 			float dx0w = dx * P0.w / W;
1741 			float dy0h = dy * P0.w / H;
1742 			float dx0h = dx * P0.w / H;
1743 			float dy0w = dy * P0.w / W;
1744 
1745 			float dx1w = dx * P1.w / W;
1746 			float dy1h = dy * P1.w / H;
1747 			float dx1h = dx * P1.w / H;
1748 			float dy1w = dy * P1.w / W;
1749 
1750 			P[0].x += -dy0w + -dx0w;
1751 			P[0].y += -dx0h + +dy0h;
1752 			C[0] = clipper->computeClipFlags(P[0]);
1753 
1754 			P[1].x += -dy1w + +dx1w;
1755 			P[1].y += -dx1h + +dy1h;
1756 			C[1] = clipper->computeClipFlags(P[1]);
1757 
1758 			P[2].x += +dy1w + +dx1w;
1759 			P[2].y += +dx1h + -dy1h;
1760 			C[2] = clipper->computeClipFlags(P[2]);
1761 
1762 			P[3].x += +dy0w + -dx0w;
1763 			P[3].y += +dx0h + +dy0h;
1764 			C[3] = clipper->computeClipFlags(P[3]);
1765 
1766 			if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1767 			{
1768 				Polygon polygon(P, 4);
1769 
1770 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1771 
1772 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1773 				{
1774 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1775 					{
1776 						return false;
1777 					}
1778 				}
1779 
1780 				return setupRoutine(&primitive, &triangle, &polygon, &data);
1781 			}
1782 		}
1783 		else   // Diamond test convention
1784 		{
1785 			float4 P[8];
1786 			int C[8];
1787 
1788 			P[0] = P0;
1789 			P[1] = P0;
1790 			P[2] = P0;
1791 			P[3] = P0;
1792 			P[4] = P1;
1793 			P[5] = P1;
1794 			P[6] = P1;
1795 			P[7] = P1;
1796 
1797 			float dx0 = lineWidth * 0.5f * P0.w / W;
1798 			float dy0 = lineWidth * 0.5f * P0.w / H;
1799 
1800 			float dx1 = lineWidth * 0.5f * P1.w / W;
1801 			float dy1 = lineWidth * 0.5f * P1.w / H;
1802 
1803 			P[0].x += -dx0;
1804 			C[0] = clipper->computeClipFlags(P[0]);
1805 
1806 			P[1].y += +dy0;
1807 			C[1] = clipper->computeClipFlags(P[1]);
1808 
1809 			P[2].x += +dx0;
1810 			C[2] = clipper->computeClipFlags(P[2]);
1811 
1812 			P[3].y += -dy0;
1813 			C[3] = clipper->computeClipFlags(P[3]);
1814 
1815 			P[4].x += -dx1;
1816 			C[4] = clipper->computeClipFlags(P[4]);
1817 
1818 			P[5].y += +dy1;
1819 			C[5] = clipper->computeClipFlags(P[5]);
1820 
1821 			P[6].x += +dx1;
1822 			C[6] = clipper->computeClipFlags(P[6]);
1823 
1824 			P[7].y += -dy1;
1825 			C[7] = clipper->computeClipFlags(P[7]);
1826 
1827 			if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE)
1828 			{
1829 				float4 L[6];
1830 
1831 				if(dx > -dy)
1832 				{
1833 					if(dx > dy)   // Right
1834 					{
1835 						L[0] = P[0];
1836 						L[1] = P[1];
1837 						L[2] = P[5];
1838 						L[3] = P[6];
1839 						L[4] = P[7];
1840 						L[5] = P[3];
1841 					}
1842 					else   // Down
1843 					{
1844 						L[0] = P[0];
1845 						L[1] = P[4];
1846 						L[2] = P[5];
1847 						L[3] = P[6];
1848 						L[4] = P[2];
1849 						L[5] = P[3];
1850 					}
1851 				}
1852 				else
1853 				{
1854 					if(dx > dy)   // Up
1855 					{
1856 						L[0] = P[0];
1857 						L[1] = P[1];
1858 						L[2] = P[2];
1859 						L[3] = P[6];
1860 						L[4] = P[7];
1861 						L[5] = P[4];
1862 					}
1863 					else   // Left
1864 					{
1865 						L[0] = P[1];
1866 						L[1] = P[2];
1867 						L[2] = P[3];
1868 						L[3] = P[7];
1869 						L[4] = P[4];
1870 						L[5] = P[5];
1871 					}
1872 				}
1873 
1874 				Polygon polygon(L, 6);
1875 
1876 				int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags;
1877 
1878 				if(clipFlagsOr != Clipper::CLIP_FINITE)
1879 				{
1880 					if(!clipper->clip(polygon, clipFlagsOr, draw))
1881 					{
1882 						return false;
1883 					}
1884 				}
1885 
1886 				return setupRoutine(&primitive, &triangle, &polygon, &data);
1887 			}
1888 		}
1889 
1890 		return false;
1891 	}
1892 
setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1893 	bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw)
1894 	{
1895 		const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer;
1896 		const SetupProcessor::State &state = draw.setupState;
1897 		const DrawData &data = *draw.data;
1898 
1899 		Vertex &v = triangle.v0;
1900 
1901 		float pSize;
1902 
1903 		int pts = state.pointSizeRegister;
1904 
1905 		if(state.pointSizeRegister != Unused)
1906 		{
1907 			pSize = v.v[pts].y;
1908 		}
1909 		else
1910 		{
1911 			pSize = data.point.pointSize[0];
1912 		}
1913 
1914 		pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax);
1915 
1916 		float4 P[4];
1917 		int C[4];
1918 
1919 		int pos = state.positionRegister;
1920 
1921 		P[0] = v.v[pos];
1922 		P[1] = v.v[pos];
1923 		P[2] = v.v[pos];
1924 		P[3] = v.v[pos];
1925 
1926 		const float X = pSize * P[0].w * data.halfPixelX[0];
1927 		const float Y = pSize * P[0].w * data.halfPixelY[0];
1928 
1929 		P[0].x -= X;
1930 		P[0].y += Y;
1931 		C[0] = clipper->computeClipFlags(P[0]);
1932 
1933 		P[1].x += X;
1934 		P[1].y += Y;
1935 		C[1] = clipper->computeClipFlags(P[1]);
1936 
1937 		P[2].x += X;
1938 		P[2].y -= Y;
1939 		C[2] = clipper->computeClipFlags(P[2]);
1940 
1941 		P[3].x -= X;
1942 		P[3].y -= Y;
1943 		C[3] = clipper->computeClipFlags(P[3]);
1944 
1945 		triangle.v1 = triangle.v0;
1946 		triangle.v2 = triangle.v0;
1947 
1948 		triangle.v1.X += iround(16 * 0.5f * pSize);
1949 		triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1);   // Both Direct3D and OpenGL expect (0, 0) in the top-left corner
1950 
1951 		Polygon polygon(P, 4);
1952 
1953 		if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE)
1954 		{
1955 			int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags;
1956 
1957 			if(clipFlagsOr != Clipper::CLIP_FINITE)
1958 			{
1959 				if(!clipper->clip(polygon, clipFlagsOr, draw))
1960 				{
1961 					return false;
1962 				}
1963 			}
1964 
1965 			return setupRoutine(&primitive, &triangle, &polygon, &data);
1966 		}
1967 
1968 		return false;
1969 	}
1970 
initializeThreads()1971 	void Renderer::initializeThreads()
1972 	{
1973 		unitCount = ceilPow2(threadCount);
1974 		clusterCount = ceilPow2(threadCount);
1975 
1976 		for(int i = 0; i < unitCount; i++)
1977 		{
1978 			triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle));
1979 			primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive));
1980 		}
1981 
1982 		for(int i = 0; i < threadCount; i++)
1983 		{
1984 			vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask));
1985 			vertexTask[i]->vertexCache.drawCall = -1;
1986 
1987 			task[i].type = Task::SUSPEND;
1988 
1989 			resume[i] = new Event();
1990 			suspend[i] = new Event();
1991 
1992 			Parameters parameters;
1993 			parameters.threadIndex = i;
1994 			parameters.renderer = this;
1995 
1996 			exitThreads = false;
1997 			worker[i] = new Thread(threadFunction, &parameters);
1998 
1999 			suspend[i]->wait();
2000 			suspend[i]->signal();
2001 		}
2002 	}
2003 
terminateThreads()2004 	void Renderer::terminateThreads()
2005 	{
2006 		while(threadsAwake != 0)
2007 		{
2008 			Thread::sleep(1);
2009 		}
2010 
2011 		for(int thread = 0; thread < threadCount; thread++)
2012 		{
2013 			if(worker[thread])
2014 			{
2015 				exitThreads = true;
2016 				resume[thread]->signal();
2017 				worker[thread]->join();
2018 
2019 				delete worker[thread];
2020 				worker[thread] = 0;
2021 				delete resume[thread];
2022 				resume[thread] = 0;
2023 				delete suspend[thread];
2024 				suspend[thread] = 0;
2025 			}
2026 
2027 			deallocate(vertexTask[thread]);
2028 			vertexTask[thread] = 0;
2029 		}
2030 
2031 		for(int i = 0; i < 16; i++)
2032 		{
2033 			deallocate(triangleBatch[i]);
2034 			triangleBatch[i] = 0;
2035 
2036 			deallocate(primitiveBatch[i]);
2037 			primitiveBatch[i] = 0;
2038 		}
2039 	}
2040 
loadConstants(const VertexShader * vertexShader)2041 	void Renderer::loadConstants(const VertexShader *vertexShader)
2042 	{
2043 		if(!vertexShader) return;
2044 
2045 		size_t count = vertexShader->getLength();
2046 
2047 		for(size_t i = 0; i < count; i++)
2048 		{
2049 			const Shader::Instruction *instruction = vertexShader->getInstruction(i);
2050 
2051 			if(instruction->opcode == Shader::OPCODE_DEF)
2052 			{
2053 				int index = instruction->dst.index;
2054 				float value[4];
2055 
2056 				value[0] = instruction->src[0].value[0];
2057 				value[1] = instruction->src[0].value[1];
2058 				value[2] = instruction->src[0].value[2];
2059 				value[3] = instruction->src[0].value[3];
2060 
2061 				setVertexShaderConstantF(index, value);
2062 			}
2063 			else if(instruction->opcode == Shader::OPCODE_DEFI)
2064 			{
2065 				int index = instruction->dst.index;
2066 				int integer[4];
2067 
2068 				integer[0] = instruction->src[0].integer[0];
2069 				integer[1] = instruction->src[0].integer[1];
2070 				integer[2] = instruction->src[0].integer[2];
2071 				integer[3] = instruction->src[0].integer[3];
2072 
2073 				setVertexShaderConstantI(index, integer);
2074 			}
2075 			else if(instruction->opcode == Shader::OPCODE_DEFB)
2076 			{
2077 				int index = instruction->dst.index;
2078 				int boolean = instruction->src[0].boolean[0];
2079 
2080 				setVertexShaderConstantB(index, &boolean);
2081 			}
2082 		}
2083 	}
2084 
loadConstants(const PixelShader * pixelShader)2085 	void Renderer::loadConstants(const PixelShader *pixelShader)
2086 	{
2087 		if(!pixelShader) return;
2088 
2089 		size_t count = pixelShader->getLength();
2090 
2091 		for(size_t i = 0; i < count; i++)
2092 		{
2093 			const Shader::Instruction *instruction = pixelShader->getInstruction(i);
2094 
2095 			if(instruction->opcode == Shader::OPCODE_DEF)
2096 			{
2097 				int index = instruction->dst.index;
2098 				float value[4];
2099 
2100 				value[0] = instruction->src[0].value[0];
2101 				value[1] = instruction->src[0].value[1];
2102 				value[2] = instruction->src[0].value[2];
2103 				value[3] = instruction->src[0].value[3];
2104 
2105 				setPixelShaderConstantF(index, value);
2106 			}
2107 			else if(instruction->opcode == Shader::OPCODE_DEFI)
2108 			{
2109 				int index = instruction->dst.index;
2110 				int integer[4];
2111 
2112 				integer[0] = instruction->src[0].integer[0];
2113 				integer[1] = instruction->src[0].integer[1];
2114 				integer[2] = instruction->src[0].integer[2];
2115 				integer[3] = instruction->src[0].integer[3];
2116 
2117 				setPixelShaderConstantI(index, integer);
2118 			}
2119 			else if(instruction->opcode == Shader::OPCODE_DEFB)
2120 			{
2121 				int index = instruction->dst.index;
2122 				int boolean = instruction->src[0].boolean[0];
2123 
2124 				setPixelShaderConstantB(index, &boolean);
2125 			}
2126 		}
2127 	}
2128 
setIndexBuffer(Resource * indexBuffer)2129 	void Renderer::setIndexBuffer(Resource *indexBuffer)
2130 	{
2131 		context->indexBuffer = indexBuffer;
2132 	}
2133 
setMultiSampleMask(unsigned int mask)2134 	void Renderer::setMultiSampleMask(unsigned int mask)
2135 	{
2136 		context->sampleMask = mask;
2137 	}
2138 
setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)2139 	void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)
2140 	{
2141 		sw::transparencyAntialiasing = transparencyAntialiasing;
2142 	}
2143 
isReadWriteTexture(int sampler)2144 	bool Renderer::isReadWriteTexture(int sampler)
2145 	{
2146 		for(int index = 0; index < RENDERTARGETS; index++)
2147 		{
2148 			if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource())
2149 			{
2150 				return true;
2151 			}
2152 		}
2153 
2154 		if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource())
2155 		{
2156 			return true;
2157 		}
2158 
2159 		return false;
2160 	}
2161 
updateClipper()2162 	void Renderer::updateClipper()
2163 	{
2164 		if(updateClipPlanes)
2165 		{
2166 			if(VertexProcessor::isFixedFunction())   // User plane in world space
2167 			{
2168 				const Matrix &scissorWorld = getViewTransform();
2169 
2170 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0];
2171 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1];
2172 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2];
2173 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3];
2174 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4];
2175 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5];
2176 			}
2177 			else   // User plane in clip space
2178 			{
2179 				if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0];
2180 				if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1];
2181 				if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2];
2182 				if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3];
2183 				if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4];
2184 				if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5];
2185 			}
2186 
2187 			updateClipPlanes = false;
2188 		}
2189 	}
2190 
setTextureResource(unsigned int sampler,Resource * resource)2191 	void Renderer::setTextureResource(unsigned int sampler, Resource *resource)
2192 	{
2193 		ASSERT(sampler < TOTAL_IMAGE_UNITS);
2194 
2195 		context->texture[sampler] = resource;
2196 	}
2197 
setTextureLevel(unsigned int sampler,unsigned int face,unsigned int level,Surface * surface,TextureType type)2198 	void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type)
2199 	{
2200 		ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS);
2201 
2202 		context->sampler[sampler].setTextureLevel(face, level, surface, type);
2203 	}
2204 
setTextureFilter(SamplerType type,int sampler,FilterType textureFilter)2205 	void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter)
2206 	{
2207 		if(type == SAMPLER_PIXEL)
2208 		{
2209 			PixelProcessor::setTextureFilter(sampler, textureFilter);
2210 		}
2211 		else
2212 		{
2213 			VertexProcessor::setTextureFilter(sampler, textureFilter);
2214 		}
2215 	}
2216 
setMipmapFilter(SamplerType type,int sampler,MipmapType mipmapFilter)2217 	void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter)
2218 	{
2219 		if(type == SAMPLER_PIXEL)
2220 		{
2221 			PixelProcessor::setMipmapFilter(sampler, mipmapFilter);
2222 		}
2223 		else
2224 		{
2225 			VertexProcessor::setMipmapFilter(sampler, mipmapFilter);
2226 		}
2227 	}
2228 
setGatherEnable(SamplerType type,int sampler,bool enable)2229 	void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable)
2230 	{
2231 		if(type == SAMPLER_PIXEL)
2232 		{
2233 			PixelProcessor::setGatherEnable(sampler, enable);
2234 		}
2235 		else
2236 		{
2237 			VertexProcessor::setGatherEnable(sampler, enable);
2238 		}
2239 	}
2240 
setAddressingModeU(SamplerType type,int sampler,AddressingMode addressMode)2241 	void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode)
2242 	{
2243 		if(type == SAMPLER_PIXEL)
2244 		{
2245 			PixelProcessor::setAddressingModeU(sampler, addressMode);
2246 		}
2247 		else
2248 		{
2249 			VertexProcessor::setAddressingModeU(sampler, addressMode);
2250 		}
2251 	}
2252 
setAddressingModeV(SamplerType type,int sampler,AddressingMode addressMode)2253 	void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode)
2254 	{
2255 		if(type == SAMPLER_PIXEL)
2256 		{
2257 			PixelProcessor::setAddressingModeV(sampler, addressMode);
2258 		}
2259 		else
2260 		{
2261 			VertexProcessor::setAddressingModeV(sampler, addressMode);
2262 		}
2263 	}
2264 
setAddressingModeW(SamplerType type,int sampler,AddressingMode addressMode)2265 	void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode)
2266 	{
2267 		if(type == SAMPLER_PIXEL)
2268 		{
2269 			PixelProcessor::setAddressingModeW(sampler, addressMode);
2270 		}
2271 		else
2272 		{
2273 			VertexProcessor::setAddressingModeW(sampler, addressMode);
2274 		}
2275 	}
2276 
setReadSRGB(SamplerType type,int sampler,bool sRGB)2277 	void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB)
2278 	{
2279 		if(type == SAMPLER_PIXEL)
2280 		{
2281 			PixelProcessor::setReadSRGB(sampler, sRGB);
2282 		}
2283 		else
2284 		{
2285 			VertexProcessor::setReadSRGB(sampler, sRGB);
2286 		}
2287 	}
2288 
setMipmapLOD(SamplerType type,int sampler,float bias)2289 	void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias)
2290 	{
2291 		if(type == SAMPLER_PIXEL)
2292 		{
2293 			PixelProcessor::setMipmapLOD(sampler, bias);
2294 		}
2295 		else
2296 		{
2297 			VertexProcessor::setMipmapLOD(sampler, bias);
2298 		}
2299 	}
2300 
setBorderColor(SamplerType type,int sampler,const Color<float> & borderColor)2301 	void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor)
2302 	{
2303 		if(type == SAMPLER_PIXEL)
2304 		{
2305 			PixelProcessor::setBorderColor(sampler, borderColor);
2306 		}
2307 		else
2308 		{
2309 			VertexProcessor::setBorderColor(sampler, borderColor);
2310 		}
2311 	}
2312 
setMaxAnisotropy(SamplerType type,int sampler,float maxAnisotropy)2313 	void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy)
2314 	{
2315 		if(type == SAMPLER_PIXEL)
2316 		{
2317 			PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2318 		}
2319 		else
2320 		{
2321 			VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy);
2322 		}
2323 	}
2324 
setHighPrecisionFiltering(SamplerType type,int sampler,bool highPrecisionFiltering)2325 	void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering)
2326 	{
2327 		if(type == SAMPLER_PIXEL)
2328 		{
2329 			PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2330 		}
2331 		else
2332 		{
2333 			VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering);
2334 		}
2335 	}
2336 
setSwizzleR(SamplerType type,int sampler,SwizzleType swizzleR)2337 	void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR)
2338 	{
2339 		if(type == SAMPLER_PIXEL)
2340 		{
2341 			PixelProcessor::setSwizzleR(sampler, swizzleR);
2342 		}
2343 		else
2344 		{
2345 			VertexProcessor::setSwizzleR(sampler, swizzleR);
2346 		}
2347 	}
2348 
setSwizzleG(SamplerType type,int sampler,SwizzleType swizzleG)2349 	void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG)
2350 	{
2351 		if(type == SAMPLER_PIXEL)
2352 		{
2353 			PixelProcessor::setSwizzleG(sampler, swizzleG);
2354 		}
2355 		else
2356 		{
2357 			VertexProcessor::setSwizzleG(sampler, swizzleG);
2358 		}
2359 	}
2360 
setSwizzleB(SamplerType type,int sampler,SwizzleType swizzleB)2361 	void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB)
2362 	{
2363 		if(type == SAMPLER_PIXEL)
2364 		{
2365 			PixelProcessor::setSwizzleB(sampler, swizzleB);
2366 		}
2367 		else
2368 		{
2369 			VertexProcessor::setSwizzleB(sampler, swizzleB);
2370 		}
2371 	}
2372 
setSwizzleA(SamplerType type,int sampler,SwizzleType swizzleA)2373 	void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA)
2374 	{
2375 		if(type == SAMPLER_PIXEL)
2376 		{
2377 			PixelProcessor::setSwizzleA(sampler, swizzleA);
2378 		}
2379 		else
2380 		{
2381 			VertexProcessor::setSwizzleA(sampler, swizzleA);
2382 		}
2383 	}
2384 
setBaseLevel(SamplerType type,int sampler,int baseLevel)2385 	void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel)
2386 	{
2387 		if(type == SAMPLER_PIXEL)
2388 		{
2389 			PixelProcessor::setBaseLevel(sampler, baseLevel);
2390 		}
2391 		else
2392 		{
2393 			VertexProcessor::setBaseLevel(sampler, baseLevel);
2394 		}
2395 	}
2396 
setMaxLevel(SamplerType type,int sampler,int maxLevel)2397 	void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel)
2398 	{
2399 		if(type == SAMPLER_PIXEL)
2400 		{
2401 			PixelProcessor::setMaxLevel(sampler, maxLevel);
2402 		}
2403 		else
2404 		{
2405 			VertexProcessor::setMaxLevel(sampler, maxLevel);
2406 		}
2407 	}
2408 
setMinLod(SamplerType type,int sampler,float minLod)2409 	void Renderer::setMinLod(SamplerType type, int sampler, float minLod)
2410 	{
2411 		if(type == SAMPLER_PIXEL)
2412 		{
2413 			PixelProcessor::setMinLod(sampler, minLod);
2414 		}
2415 		else
2416 		{
2417 			VertexProcessor::setMinLod(sampler, minLod);
2418 		}
2419 	}
2420 
setMaxLod(SamplerType type,int sampler,float maxLod)2421 	void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod)
2422 	{
2423 		if(type == SAMPLER_PIXEL)
2424 		{
2425 			PixelProcessor::setMaxLod(sampler, maxLod);
2426 		}
2427 		else
2428 		{
2429 			VertexProcessor::setMaxLod(sampler, maxLod);
2430 		}
2431 	}
2432 
setPointSpriteEnable(bool pointSpriteEnable)2433 	void Renderer::setPointSpriteEnable(bool pointSpriteEnable)
2434 	{
2435 		context->setPointSpriteEnable(pointSpriteEnable);
2436 	}
2437 
setPointScaleEnable(bool pointScaleEnable)2438 	void Renderer::setPointScaleEnable(bool pointScaleEnable)
2439 	{
2440 		context->setPointScaleEnable(pointScaleEnable);
2441 	}
2442 
setLineWidth(float width)2443 	void Renderer::setLineWidth(float width)
2444 	{
2445 		context->lineWidth = width;
2446 	}
2447 
setDepthBias(float bias)2448 	void Renderer::setDepthBias(float bias)
2449 	{
2450 		depthBias = bias;
2451 	}
2452 
setSlopeDepthBias(float slopeBias)2453 	void Renderer::setSlopeDepthBias(float slopeBias)
2454 	{
2455 		slopeDepthBias = slopeBias;
2456 	}
2457 
setRasterizerDiscard(bool rasterizerDiscard)2458 	void Renderer::setRasterizerDiscard(bool rasterizerDiscard)
2459 	{
2460 		context->rasterizerDiscard = rasterizerDiscard;
2461 	}
2462 
setPixelShader(const PixelShader * shader)2463 	void Renderer::setPixelShader(const PixelShader *shader)
2464 	{
2465 		context->pixelShader = shader;
2466 
2467 		loadConstants(shader);
2468 	}
2469 
setVertexShader(const VertexShader * shader)2470 	void Renderer::setVertexShader(const VertexShader *shader)
2471 	{
2472 		context->vertexShader = shader;
2473 
2474 		loadConstants(shader);
2475 	}
2476 
setPixelShaderConstantF(unsigned int index,const float value[4],unsigned int count)2477 	void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2478 	{
2479 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2480 		{
2481 			if(drawCall[i]->psDirtyConstF < index + count)
2482 			{
2483 				drawCall[i]->psDirtyConstF = index + count;
2484 			}
2485 		}
2486 
2487 		for(unsigned int i = 0; i < count; i++)
2488 		{
2489 			PixelProcessor::setFloatConstant(index + i, value);
2490 			value += 4;
2491 		}
2492 	}
2493 
setPixelShaderConstantI(unsigned int index,const int value[4],unsigned int count)2494 	void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2495 	{
2496 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2497 		{
2498 			if(drawCall[i]->psDirtyConstI < index + count)
2499 			{
2500 				drawCall[i]->psDirtyConstI = index + count;
2501 			}
2502 		}
2503 
2504 		for(unsigned int i = 0; i < count; i++)
2505 		{
2506 			PixelProcessor::setIntegerConstant(index + i, value);
2507 			value += 4;
2508 		}
2509 	}
2510 
setPixelShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2511 	void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2512 	{
2513 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2514 		{
2515 			if(drawCall[i]->psDirtyConstB < index + count)
2516 			{
2517 				drawCall[i]->psDirtyConstB = index + count;
2518 			}
2519 		}
2520 
2521 		for(unsigned int i = 0; i < count; i++)
2522 		{
2523 			PixelProcessor::setBooleanConstant(index + i, *boolean);
2524 			boolean++;
2525 		}
2526 	}
2527 
setVertexShaderConstantF(unsigned int index,const float value[4],unsigned int count)2528 	void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count)
2529 	{
2530 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2531 		{
2532 			if(drawCall[i]->vsDirtyConstF < index + count)
2533 			{
2534 				drawCall[i]->vsDirtyConstF = index + count;
2535 			}
2536 		}
2537 
2538 		for(unsigned int i = 0; i < count; i++)
2539 		{
2540 			VertexProcessor::setFloatConstant(index + i, value);
2541 			value += 4;
2542 		}
2543 	}
2544 
setVertexShaderConstantI(unsigned int index,const int value[4],unsigned int count)2545 	void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count)
2546 	{
2547 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2548 		{
2549 			if(drawCall[i]->vsDirtyConstI < index + count)
2550 			{
2551 				drawCall[i]->vsDirtyConstI = index + count;
2552 			}
2553 		}
2554 
2555 		for(unsigned int i = 0; i < count; i++)
2556 		{
2557 			VertexProcessor::setIntegerConstant(index + i, value);
2558 			value += 4;
2559 		}
2560 	}
2561 
setVertexShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2562 	void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count)
2563 	{
2564 		for(unsigned int i = 0; i < DRAW_COUNT; i++)
2565 		{
2566 			if(drawCall[i]->vsDirtyConstB < index + count)
2567 			{
2568 				drawCall[i]->vsDirtyConstB = index + count;
2569 			}
2570 		}
2571 
2572 		for(unsigned int i = 0; i < count; i++)
2573 		{
2574 			VertexProcessor::setBooleanConstant(index + i, *boolean);
2575 			boolean++;
2576 		}
2577 	}
2578 
setModelMatrix(const Matrix & M,int i)2579 	void Renderer::setModelMatrix(const Matrix &M, int i)
2580 	{
2581 		VertexProcessor::setModelMatrix(M, i);
2582 	}
2583 
setViewMatrix(const Matrix & V)2584 	void Renderer::setViewMatrix(const Matrix &V)
2585 	{
2586 		VertexProcessor::setViewMatrix(V);
2587 		updateClipPlanes = true;
2588 	}
2589 
setBaseMatrix(const Matrix & B)2590 	void Renderer::setBaseMatrix(const Matrix &B)
2591 	{
2592 		VertexProcessor::setBaseMatrix(B);
2593 		updateClipPlanes = true;
2594 	}
2595 
setProjectionMatrix(const Matrix & P)2596 	void Renderer::setProjectionMatrix(const Matrix &P)
2597 	{
2598 		VertexProcessor::setProjectionMatrix(P);
2599 		updateClipPlanes = true;
2600 	}
2601 
addQuery(Query * query)2602 	void Renderer::addQuery(Query *query)
2603 	{
2604 		queries.push_back(query);
2605 	}
2606 
removeQuery(Query * query)2607 	void Renderer::removeQuery(Query *query)
2608 	{
2609 		queries.remove(query);
2610 	}
2611 
2612 	#if PERF_HUD
getThreadCount()2613 		int Renderer::getThreadCount()
2614 		{
2615 			return threadCount;
2616 		}
2617 
getVertexTime(int thread)2618 		int64_t Renderer::getVertexTime(int thread)
2619 		{
2620 			return vertexTime[thread];
2621 		}
2622 
getSetupTime(int thread)2623 		int64_t Renderer::getSetupTime(int thread)
2624 		{
2625 			return setupTime[thread];
2626 		}
2627 
getPixelTime(int thread)2628 		int64_t Renderer::getPixelTime(int thread)
2629 		{
2630 			return pixelTime[thread];
2631 		}
2632 
resetTimers()2633 		void Renderer::resetTimers()
2634 		{
2635 			for(int thread = 0; thread < threadCount; thread++)
2636 			{
2637 				vertexTime[thread] = 0;
2638 				setupTime[thread] = 0;
2639 				pixelTime[thread] = 0;
2640 			}
2641 		}
2642 	#endif
2643 
setViewport(const Viewport & viewport)2644 	void Renderer::setViewport(const Viewport &viewport)
2645 	{
2646 		this->viewport = viewport;
2647 	}
2648 
setScissor(const Rect & scissor)2649 	void Renderer::setScissor(const Rect &scissor)
2650 	{
2651 		this->scissor = scissor;
2652 	}
2653 
setClipFlags(int flags)2654 	void Renderer::setClipFlags(int flags)
2655 	{
2656 		clipFlags = flags << 8;   // Bottom 8 bits used by legacy frustum
2657 	}
2658 
setClipPlane(unsigned int index,const float plane[4])2659 	void Renderer::setClipPlane(unsigned int index, const float plane[4])
2660 	{
2661 		if(index < MAX_CLIP_PLANES)
2662 		{
2663 			userPlane[index] = plane;
2664 		}
2665 		else ASSERT(false);
2666 
2667 		updateClipPlanes = true;
2668 	}
2669 
updateConfiguration(bool initialUpdate)2670 	void Renderer::updateConfiguration(bool initialUpdate)
2671 	{
2672 		bool newConfiguration = swiftConfig->hasNewConfiguration();
2673 
2674 		if(newConfiguration || initialUpdate)
2675 		{
2676 			terminateThreads();
2677 
2678 			SwiftConfig::Configuration configuration = {};
2679 			swiftConfig->getConfiguration(configuration);
2680 
2681 			precacheVertex = !newConfiguration && configuration.precache;
2682 			precacheSetup = !newConfiguration && configuration.precache;
2683 			precachePixel = !newConfiguration && configuration.precache;
2684 
2685 			VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize);
2686 			PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize);
2687 			SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize);
2688 
2689 			switch(configuration.textureSampleQuality)
2690 			{
2691 			case 0:  Sampler::setFilterQuality(FILTER_POINT);       break;
2692 			case 1:  Sampler::setFilterQuality(FILTER_LINEAR);      break;
2693 			case 2:  Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2694 			default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break;
2695 			}
2696 
2697 			switch(configuration.mipmapQuality)
2698 			{
2699 			case 0:  Sampler::setMipmapQuality(MIPMAP_POINT);  break;
2700 			case 1:  Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2701 			default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break;
2702 			}
2703 
2704 			setPerspectiveCorrection(configuration.perspectiveCorrection);
2705 
2706 			switch(configuration.transcendentalPrecision)
2707 			{
2708 			case 0:
2709 				logPrecision = APPROXIMATE;
2710 				expPrecision = APPROXIMATE;
2711 				rcpPrecision = APPROXIMATE;
2712 				rsqPrecision = APPROXIMATE;
2713 				break;
2714 			case 1:
2715 				logPrecision = PARTIAL;
2716 				expPrecision = PARTIAL;
2717 				rcpPrecision = PARTIAL;
2718 				rsqPrecision = PARTIAL;
2719 				break;
2720 			case 2:
2721 				logPrecision = ACCURATE;
2722 				expPrecision = ACCURATE;
2723 				rcpPrecision = ACCURATE;
2724 				rsqPrecision = ACCURATE;
2725 				break;
2726 			case 3:
2727 				logPrecision = WHQL;
2728 				expPrecision = WHQL;
2729 				rcpPrecision = WHQL;
2730 				rsqPrecision = WHQL;
2731 				break;
2732 			case 4:
2733 				logPrecision = IEEE;
2734 				expPrecision = IEEE;
2735 				rcpPrecision = IEEE;
2736 				rsqPrecision = IEEE;
2737 				break;
2738 			default:
2739 				logPrecision = ACCURATE;
2740 				expPrecision = ACCURATE;
2741 				rcpPrecision = ACCURATE;
2742 				rsqPrecision = ACCURATE;
2743 				break;
2744 			}
2745 
2746 			switch(configuration.transparencyAntialiasing)
2747 			{
2748 			case 0:  transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2749 			case 1:  transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break;
2750 			default: transparencyAntialiasing = TRANSPARENCY_NONE;              break;
2751 			}
2752 
2753 			switch(configuration.threadCount)
2754 			{
2755 			case -1: threadCount = CPUID::coreCount();        break;
2756 			case 0:  threadCount = CPUID::processAffinity();  break;
2757 			default: threadCount = configuration.threadCount; break;
2758 			}
2759 
2760 			CPUID::setEnableSSE4_1(configuration.enableSSE4_1);
2761 			CPUID::setEnableSSSE3(configuration.enableSSSE3);
2762 			CPUID::setEnableSSE3(configuration.enableSSE3);
2763 			CPUID::setEnableSSE2(configuration.enableSSE2);
2764 			CPUID::setEnableSSE(configuration.enableSSE);
2765 
2766 			for(int pass = 0; pass < 10; pass++)
2767 			{
2768 				optimization[pass] = configuration.optimization[pass];
2769 			}
2770 
2771 			forceWindowed = configuration.forceWindowed;
2772 			complementaryDepthBuffer = configuration.complementaryDepthBuffer;
2773 			postBlendSRGB = configuration.postBlendSRGB;
2774 			exactColorRounding = configuration.exactColorRounding;
2775 			forceClearRegisters = configuration.forceClearRegisters;
2776 
2777 		#ifndef NDEBUG
2778 			minPrimitives = configuration.minPrimitives;
2779 			maxPrimitives = configuration.maxPrimitives;
2780 		#endif
2781 		}
2782 
2783 		if(!initialUpdate && !worker[0])
2784 		{
2785 			initializeThreads();
2786 		}
2787 	}
2788 }
2789