1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Renderer.hpp" 16 17 #include "Clipper.hpp" 18 #include "Surface.hpp" 19 #include "Primitive.hpp" 20 #include "Polygon.hpp" 21 #include "Main/FrameBuffer.hpp" 22 #include "Main/SwiftConfig.hpp" 23 #include "Reactor/Reactor.hpp" 24 #include "Shader/Constants.hpp" 25 #include "Common/MutexLock.hpp" 26 #include "Common/CPUID.hpp" 27 #include "Common/Memory.hpp" 28 #include "Common/Resource.hpp" 29 #include "Common/Half.hpp" 30 #include "Common/Math.hpp" 31 #include "Common/Timer.hpp" 32 #include "Common/Debug.hpp" 33 34 #undef max 35 36 bool disableServer = true; 37 38 #ifndef NDEBUG 39 unsigned int minPrimitives = 1; 40 unsigned int maxPrimitives = 1 << 21; 41 #endif 42 43 namespace sw 44 { 45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] 47 extern bool booleanFaceRegister; 48 extern bool fullPixelPositionRegister; 49 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last 50 extern bool secondaryColor; // Specular lighting is applied after texturing 51 extern bool colorsDefaultToZero; 52 53 extern bool forceWindowed; 54 extern bool complementaryDepthBuffer; 55 extern bool postBlendSRGB; 56 extern bool exactColorRounding; 57 extern TransparencyAntialiasing transparencyAntialiasing; 58 extern bool forceClearRegisters; 59 60 extern bool precacheVertex; 61 extern bool precacheSetup; 62 extern bool precachePixel; 63 64 static const int batchSize = 128; 65 AtomicInt threadCount(1); 66 AtomicInt Renderer::unitCount(1); 67 AtomicInt Renderer::clusterCount(1); 68 69 TranscendentalPrecision logPrecision = ACCURATE; 70 TranscendentalPrecision expPrecision = ACCURATE; 71 TranscendentalPrecision rcpPrecision = ACCURATE; 72 TranscendentalPrecision rsqPrecision = ACCURATE; 73 bool perspectiveCorrection = true; 74 setGlobalRenderingSettings(Conventions conventions,bool exactColorRounding)75 static void setGlobalRenderingSettings(Conventions conventions, bool exactColorRounding) 76 { 77 static bool initialized = false; 78 79 if(!initialized) 80 { 81 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates; 82 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth; 83 sw::booleanFaceRegister = conventions.booleanFaceRegister; 84 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister; 85 sw::leadingVertexFirst = conventions.leadingVertexFirst; 86 sw::secondaryColor = conventions.secondaryColor; 87 sw::colorsDefaultToZero = conventions.colorsDefaultToZero; 88 sw::exactColorRounding = exactColorRounding; 89 initialized = true; 90 } 91 } 92 93 struct Parameters 94 { 95 Renderer *renderer; 96 int threadIndex; 97 }; 98 Query(Type type)99 Query::Query(Type type) : building(false), data(0), type(type), reference(1) 100 { 101 } 102 addRef()103 void Query::addRef() 104 { 105 ++reference; // Atomic 106 } 107 release()108 void Query::release() 109 { 110 int ref = reference--; // Atomic 111 112 ASSERT(ref >= 0); 113 114 if(ref == 0) 115 { 116 delete this; 117 } 118 } 119 DrawCall()120 DrawCall::DrawCall() 121 { 122 queries = 0; 123 124 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 125 vsDirtyConstI = 16; 126 vsDirtyConstB = 16; 127 128 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS; 129 psDirtyConstI = 16; 130 psDirtyConstB = 16; 131 132 references = -1; 133 134 data = (DrawData*)allocate(sizeof(DrawData)); 135 data->constants = &constants; 136 } 137 ~DrawCall()138 DrawCall::~DrawCall() 139 { 140 delete queries; 141 142 deallocate(data); 143 } 144 Renderer(Context * context,Conventions conventions,bool exactColorRounding)145 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport() 146 { 147 setGlobalRenderingSettings(conventions, exactColorRounding); 148 149 setRenderTarget(0, 0); 150 clipper = new Clipper(symmetricNormalizedDepth); 151 blitter = new Blitter; 152 153 updateViewMatrix = true; 154 updateBaseMatrix = true; 155 updateProjectionMatrix = true; 156 updateClipPlanes = true; 157 158 #if PERF_HUD 159 resetTimers(); 160 #endif 161 162 for(int i = 0; i < 16; i++) 163 { 164 vertexTask[i] = 0; 165 166 worker[i] = 0; 167 resume[i] = 0; 168 suspend[i] = 0; 169 } 170 171 threadsAwake = 0; 172 resumeApp = new Event(); 173 174 currentDraw = 0; 175 nextDraw = 0; 176 177 qHead = 0; 178 qSize = 0; 179 180 for(int i = 0; i < 16; i++) 181 { 182 triangleBatch[i] = 0; 183 primitiveBatch[i] = 0; 184 } 185 186 for(int draw = 0; draw < DRAW_COUNT; draw++) 187 { 188 drawCall[draw] = new DrawCall(); 189 drawList[draw] = drawCall[draw]; 190 } 191 192 for(int unit = 0; unit < 16; unit++) 193 { 194 primitiveProgress[unit].init(); 195 } 196 197 for(int cluster = 0; cluster < 16; cluster++) 198 { 199 pixelProgress[cluster].init(); 200 } 201 202 clipFlags = 0; 203 204 swiftConfig = new SwiftConfig(disableServer); 205 updateConfiguration(true); 206 207 sync = new Resource(0); 208 } 209 ~Renderer()210 Renderer::~Renderer() 211 { 212 sync->lock(EXCLUSIVE); 213 sync->destruct(); 214 terminateThreads(); 215 sync->unlock(); 216 217 delete clipper; 218 clipper = nullptr; 219 220 delete blitter; 221 blitter = nullptr; 222 223 delete resumeApp; 224 resumeApp = nullptr; 225 226 for(int draw = 0; draw < DRAW_COUNT; draw++) 227 { 228 delete drawCall[draw]; 229 drawCall[draw] = nullptr; 230 } 231 232 delete swiftConfig; 233 swiftConfig = nullptr; 234 } 235 236 // This object has to be mem aligned operator new(size_t size)237 void* Renderer::operator new(size_t size) 238 { 239 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class 240 return sw::allocate(sizeof(Renderer), 16); 241 } 242 operator delete(void * mem)243 void Renderer::operator delete(void * mem) 244 { 245 sw::deallocate(mem); 246 } 247 draw(DrawType drawType,unsigned int indexOffset,unsigned int count,bool update)248 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update) 249 { 250 #ifndef NDEBUG 251 if(count < minPrimitives || count > maxPrimitives) 252 { 253 return; 254 } 255 #endif 256 257 context->drawType = drawType; 258 259 updateConfiguration(); 260 updateClipper(); 261 262 int ss = context->getSuperSampleCount(); 263 int ms = context->getMultiSampleCount(); 264 bool requiresSync = false; 265 266 for(int q = 0; q < ss; q++) 267 { 268 unsigned int oldMultiSampleMask = context->multiSampleMask; 269 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms)); 270 271 if(!context->multiSampleMask) 272 { 273 continue; 274 } 275 276 sync->lock(sw::PRIVATE); 277 278 if(update || oldMultiSampleMask != context->multiSampleMask) 279 { 280 vertexState = VertexProcessor::update(drawType); 281 setupState = SetupProcessor::update(); 282 pixelState = PixelProcessor::update(); 283 284 vertexRoutine = VertexProcessor::routine(vertexState); 285 setupRoutine = SetupProcessor::routine(setupState); 286 pixelRoutine = PixelProcessor::routine(pixelState); 287 } 288 289 int batch = batchSize / ms; 290 291 int (Renderer::*setupPrimitives)(int batch, int count); 292 293 if(context->isDrawTriangle()) 294 { 295 switch(context->fillMode) 296 { 297 case FILL_SOLID: 298 setupPrimitives = &Renderer::setupSolidTriangles; 299 break; 300 case FILL_WIREFRAME: 301 setupPrimitives = &Renderer::setupWireframeTriangle; 302 batch = 1; 303 break; 304 case FILL_VERTEX: 305 setupPrimitives = &Renderer::setupVertexTriangle; 306 batch = 1; 307 break; 308 default: 309 ASSERT(false); 310 return; 311 } 312 } 313 else if(context->isDrawLine()) 314 { 315 setupPrimitives = &Renderer::setupLines; 316 } 317 else // Point draw 318 { 319 setupPrimitives = &Renderer::setupPoints; 320 } 321 322 DrawCall *draw = nullptr; 323 324 do 325 { 326 for(int i = 0; i < DRAW_COUNT; i++) 327 { 328 if(drawCall[i]->references == -1) 329 { 330 draw = drawCall[i]; 331 drawList[nextDraw & DRAW_COUNT_BITS] = draw; 332 333 break; 334 } 335 } 336 337 if(!draw) 338 { 339 resumeApp->wait(); 340 } 341 } 342 while(!draw); 343 344 DrawData *data = draw->data; 345 346 if(queries.size() != 0) 347 { 348 draw->queries = new std::list<Query*>(); 349 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled; 350 for(auto &query : queries) 351 { 352 if(includePrimitivesWrittenQueries || (query->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN)) 353 { 354 query->addRef(); 355 draw->queries->push_back(query); 356 } 357 } 358 } 359 360 draw->drawType = drawType; 361 draw->batchSize = batch; 362 363 draw->vertexRoutine = vertexRoutine; 364 draw->setupRoutine = setupRoutine; 365 draw->pixelRoutine = pixelRoutine; 366 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry(); 367 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry(); 368 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry(); 369 draw->setupPrimitives = setupPrimitives; 370 draw->setupState = setupState; 371 372 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 373 { 374 draw->vertexStream[i] = context->input[i].resource; 375 data->input[i] = context->input[i].buffer; 376 data->stride[i] = context->input[i].stride; 377 378 if(draw->vertexStream[i]) 379 { 380 draw->vertexStream[i]->lock(PUBLIC, PRIVATE); 381 } 382 } 383 384 if(context->indexBuffer) 385 { 386 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset; 387 } 388 389 draw->indexBuffer = context->indexBuffer; 390 391 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++) 392 { 393 draw->texture[sampler] = 0; 394 } 395 396 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++) 397 { 398 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL) 399 { 400 draw->texture[sampler] = context->texture[sampler]; 401 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets 402 403 data->mipmap[sampler] = context->sampler[sampler].getTextureData(); 404 405 requiresSync |= context->sampler[sampler].requiresSync(); 406 } 407 } 408 409 if(context->pixelShader) 410 { 411 if(draw->psDirtyConstF) 412 { 413 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8)); 414 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF); 415 draw->psDirtyConstF = 0; 416 } 417 418 if(draw->psDirtyConstI) 419 { 420 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI); 421 draw->psDirtyConstI = 0; 422 } 423 424 if(draw->psDirtyConstB) 425 { 426 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB); 427 draw->psDirtyConstB = 0; 428 } 429 430 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers); 431 } 432 else 433 { 434 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 435 { 436 draw->pUniformBuffers[i] = nullptr; 437 } 438 } 439 440 if(context->pixelShaderModel() <= 0x0104) 441 { 442 for(int stage = 0; stage < 8; stage++) 443 { 444 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader) 445 { 446 data->textureStage[stage] = context->textureStage[stage].uniforms; 447 } 448 else break; 449 } 450 } 451 452 if(context->vertexShader) 453 { 454 if(context->vertexShader->getShaderModel() >= 0x0300) 455 { 456 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++) 457 { 458 if(vertexState.sampler[sampler].textureType != TEXTURE_NULL) 459 { 460 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler]; 461 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE); 462 463 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData(); 464 465 requiresSync |= context->sampler[TEXTURE_IMAGE_UNITS + sampler].requiresSync(); 466 } 467 } 468 } 469 470 if(draw->vsDirtyConstF) 471 { 472 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF); 473 draw->vsDirtyConstF = 0; 474 } 475 476 if(draw->vsDirtyConstI) 477 { 478 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI); 479 draw->vsDirtyConstI = 0; 480 } 481 482 if(draw->vsDirtyConstB) 483 { 484 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB); 485 draw->vsDirtyConstB = 0; 486 } 487 488 if(context->vertexShader->isInstanceIdDeclared()) 489 { 490 data->instanceID = context->instanceID; 491 } 492 493 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers); 494 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers); 495 } 496 else 497 { 498 data->ff = ff; 499 500 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 501 draw->vsDirtyConstI = 16; 502 draw->vsDirtyConstB = 16; 503 504 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 505 { 506 draw->vUniformBuffers[i] = nullptr; 507 } 508 509 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 510 { 511 draw->transformFeedbackBuffers[i] = nullptr; 512 } 513 } 514 515 if(pixelState.stencilActive) 516 { 517 data->stencil[0] = stencil; 518 data->stencil[1] = stencilCCW; 519 } 520 521 if(pixelState.fogActive) 522 { 523 data->fog = fog; 524 } 525 526 if(setupState.isDrawPoint) 527 { 528 data->point = point; 529 } 530 531 data->lineWidth = context->lineWidth; 532 533 data->factor = factor; 534 535 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 536 { 537 float ref = context->alphaReference * (1.0f / 255.0f); 538 float margin = sw::min(ref, 1.0f - ref); 539 540 if(ms == 4) 541 { 542 data->a2c0 = replicate(ref - margin * 0.6f); 543 data->a2c1 = replicate(ref - margin * 0.2f); 544 data->a2c2 = replicate(ref + margin * 0.2f); 545 data->a2c3 = replicate(ref + margin * 0.6f); 546 } 547 else if(ms == 2) 548 { 549 data->a2c0 = replicate(ref - margin * 0.3f); 550 data->a2c1 = replicate(ref + margin * 0.3f); 551 } 552 else ASSERT(false); 553 } 554 555 if(pixelState.occlusionEnabled) 556 { 557 for(int cluster = 0; cluster < clusterCount; cluster++) 558 { 559 data->occlusion[cluster] = 0; 560 } 561 } 562 563 #if PERF_PROFILE 564 for(int cluster = 0; cluster < clusterCount; cluster++) 565 { 566 for(int i = 0; i < PERF_TIMERS; i++) 567 { 568 data->cycles[i][cluster] = 0; 569 } 570 } 571 #endif 572 573 // Viewport 574 { 575 float W = 0.5f * viewport.width; 576 float H = 0.5f * viewport.height; 577 float X0 = viewport.x0 + W; 578 float Y0 = viewport.y0 + H; 579 float N = viewport.minZ; 580 float F = viewport.maxZ; 581 float Z = F - N; 582 583 if(context->isDrawTriangle(false)) 584 { 585 N += context->depthBias; 586 } 587 588 if(complementaryDepthBuffer) 589 { 590 Z = -Z; 591 N = 1 - N; 592 } 593 594 static const float X[5][16] = // Fragment offsets 595 { 596 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 597 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 598 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 599 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 600 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples 601 }; 602 603 static const float Y[5][16] = // Fragment offsets 604 { 605 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 606 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 607 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 608 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 609 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples 610 }; 611 612 int s = sw::log2(ss); 613 614 data->Wx16 = replicate(W * 16); 615 data->Hx16 = replicate(H * 16); 616 data->X0x16 = replicate(X0 * 16 - 8); 617 data->Y0x16 = replicate(Y0 * 16 - 8); 618 data->XXXX = replicate(X[s][q] / W); 619 data->YYYY = replicate(Y[s][q] / H); 620 data->halfPixelX = replicate(0.5f / W); 621 data->halfPixelY = replicate(0.5f / H); 622 data->viewportHeight = abs(viewport.height); 623 data->slopeDepthBias = context->slopeDepthBias; 624 data->depthRange = Z; 625 data->depthNear = N; 626 draw->clipFlags = clipFlags; 627 628 if(clipFlags) 629 { 630 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0]; 631 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1]; 632 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2]; 633 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3]; 634 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4]; 635 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5]; 636 } 637 } 638 639 // Target 640 { 641 for(int index = 0; index < RENDERTARGETS; index++) 642 { 643 draw->renderTarget[index] = context->renderTarget[index]; 644 645 if(draw->renderTarget[index]) 646 { 647 unsigned int layer = context->renderTargetLayer[index]; 648 requiresSync |= context->renderTarget[index]->requiresSync(); 649 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); 650 data->colorBuffer[index] += q * ms * context->renderTarget[index]->getSliceB(true); 651 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB(); 652 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB(); 653 } 654 } 655 656 draw->depthBuffer = context->depthBuffer; 657 draw->stencilBuffer = context->stencilBuffer; 658 659 if(draw->depthBuffer) 660 { 661 unsigned int layer = context->depthBufferLayer; 662 requiresSync |= context->depthBuffer->requiresSync(); 663 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, layer, LOCK_READWRITE, MANAGED); 664 data->depthBuffer += q * ms * context->depthBuffer->getSliceB(true); 665 data->depthPitchB = context->depthBuffer->getInternalPitchB(); 666 data->depthSliceB = context->depthBuffer->getInternalSliceB(); 667 } 668 669 if(draw->stencilBuffer) 670 { 671 unsigned int layer = context->stencilBufferLayer; 672 requiresSync |= context->stencilBuffer->requiresSync(); 673 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, layer, MANAGED); 674 data->stencilBuffer += q * ms * context->stencilBuffer->getSliceB(true); 675 data->stencilPitchB = context->stencilBuffer->getStencilPitchB(); 676 data->stencilSliceB = context->stencilBuffer->getStencilSliceB(); 677 } 678 } 679 680 // Scissor 681 { 682 data->scissorX0 = scissor.x0; 683 data->scissorX1 = scissor.x1; 684 data->scissorY0 = scissor.y0; 685 data->scissorY1 = scissor.y1; 686 } 687 688 draw->primitive = 0; 689 draw->count = count; 690 691 draw->references = (count + batch - 1) / batch; 692 693 schedulerMutex.lock(); 694 ++nextDraw; // Atomic 695 schedulerMutex.unlock(); 696 697 #ifndef NDEBUG 698 if(threadCount == 1) // Use main thread for draw execution 699 { 700 threadsAwake = 1; 701 task[0].type = Task::RESUME; 702 703 taskLoop(0); 704 } 705 else 706 #endif 707 { 708 if(!threadsAwake) 709 { 710 suspend[0]->wait(); 711 712 threadsAwake = 1; 713 task[0].type = Task::RESUME; 714 715 resume[0]->signal(); 716 } 717 } 718 } 719 720 // TODO(sugoi): This is a temporary brute-force workaround to ensure IOSurface synchronization. 721 if(requiresSync) 722 { 723 synchronize(); 724 } 725 } 726 clear(void * value,Format format,Surface * dest,const Rect & clearRect,unsigned int rgbaMask)727 void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask) 728 { 729 blitter->clear(value, format, dest, clearRect, rgbaMask); 730 } 731 blit(Surface * source,const SliceRectF & sRect,Surface * dest,const SliceRect & dRect,bool filter,bool isStencil,bool sRGBconversion)732 void Renderer::blit(Surface *source, const SliceRectF &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil, bool sRGBconversion) 733 { 734 blitter->blit(source, sRect, dest, dRect, {filter, isStencil, sRGBconversion}); 735 } 736 blit3D(Surface * source,Surface * dest)737 void Renderer::blit3D(Surface *source, Surface *dest) 738 { 739 blitter->blit3D(source, dest); 740 } 741 threadFunction(void * parameters)742 void Renderer::threadFunction(void *parameters) 743 { 744 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer; 745 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex; 746 747 if(logPrecision < IEEE) 748 { 749 CPUID::setFlushToZero(true); 750 CPUID::setDenormalsAreZero(true); 751 } 752 753 renderer->threadLoop(threadIndex); 754 } 755 threadLoop(int threadIndex)756 void Renderer::threadLoop(int threadIndex) 757 { 758 while(!exitThreads) 759 { 760 taskLoop(threadIndex); 761 762 suspend[threadIndex]->signal(); 763 resume[threadIndex]->wait(); 764 } 765 } 766 taskLoop(int threadIndex)767 void Renderer::taskLoop(int threadIndex) 768 { 769 while(task[threadIndex].type != Task::SUSPEND) 770 { 771 scheduleTask(threadIndex); 772 executeTask(threadIndex); 773 } 774 } 775 findAvailableTasks()776 void Renderer::findAvailableTasks() 777 { 778 // Find pixel tasks 779 for(int cluster = 0; cluster < clusterCount; cluster++) 780 { 781 if(!pixelProgress[cluster].executing) 782 { 783 for(int unit = 0; unit < unitCount; unit++) 784 { 785 if(primitiveProgress[unit].references > 0) // Contains processed primitives 786 { 787 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall) 788 { 789 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered 790 { 791 Task &task = taskQueue[qHead]; 792 task.type = Task::PIXELS; 793 task.primitiveUnit = unit; 794 task.pixelCluster = cluster; 795 796 pixelProgress[cluster].executing = true; 797 798 // Commit to the task queue 799 qHead = (qHead + 1) & TASK_COUNT_BITS; 800 qSize++; 801 802 break; 803 } 804 } 805 } 806 } 807 } 808 } 809 810 // Find primitive tasks 811 if(currentDraw == nextDraw) 812 { 813 return; // No more primitives to process 814 } 815 816 for(int unit = 0; unit < unitCount; unit++) 817 { 818 DrawCall *draw = drawList[currentDraw & DRAW_COUNT_BITS]; 819 820 int primitive = draw->primitive; 821 int count = draw->count; 822 823 if(primitive >= count) 824 { 825 ++currentDraw; // Atomic 826 827 if(currentDraw == nextDraw) 828 { 829 return; // No more primitives to process 830 } 831 832 draw = drawList[currentDraw & DRAW_COUNT_BITS]; 833 } 834 835 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit 836 { 837 primitive = draw->primitive; 838 count = draw->count; 839 int batch = draw->batchSize; 840 841 primitiveProgress[unit].drawCall = currentDraw; 842 primitiveProgress[unit].firstPrimitive = primitive; 843 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive; 844 845 draw->primitive += batch; 846 847 Task &task = taskQueue[qHead]; 848 task.type = Task::PRIMITIVES; 849 task.primitiveUnit = unit; 850 851 primitiveProgress[unit].references = -1; 852 853 // Commit to the task queue 854 qHead = (qHead + 1) & TASK_COUNT_BITS; 855 qSize++; 856 } 857 } 858 } 859 scheduleTask(int threadIndex)860 void Renderer::scheduleTask(int threadIndex) 861 { 862 schedulerMutex.lock(); 863 864 int curThreadsAwake = threadsAwake; 865 866 if((int)qSize < threadCount - curThreadsAwake + 1) 867 { 868 findAvailableTasks(); 869 } 870 871 if(qSize != 0) 872 { 873 task[threadIndex] = taskQueue[(qHead - qSize) & TASK_COUNT_BITS]; 874 qSize--; 875 876 if(curThreadsAwake != threadCount) 877 { 878 int wakeup = qSize - curThreadsAwake + 1; 879 880 for(int i = 0; i < threadCount && wakeup > 0; i++) 881 { 882 if(task[i].type == Task::SUSPEND) 883 { 884 suspend[i]->wait(); 885 task[i].type = Task::RESUME; 886 resume[i]->signal(); 887 888 ++threadsAwake; // Atomic 889 wakeup--; 890 } 891 } 892 } 893 } 894 else 895 { 896 task[threadIndex].type = Task::SUSPEND; 897 898 --threadsAwake; // Atomic 899 } 900 901 schedulerMutex.unlock(); 902 } 903 executeTask(int threadIndex)904 void Renderer::executeTask(int threadIndex) 905 { 906 #if PERF_HUD 907 int64_t startTick = Timer::ticks(); 908 #endif 909 910 switch(task[threadIndex].type) 911 { 912 case Task::PRIMITIVES: 913 { 914 int unit = task[threadIndex].primitiveUnit; 915 916 int input = primitiveProgress[unit].firstPrimitive; 917 int count = primitiveProgress[unit].primitiveCount; 918 DrawCall *draw = drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 919 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives; 920 921 processPrimitiveVertices(unit, input, count, draw->count, threadIndex); 922 923 #if PERF_HUD 924 int64_t time = Timer::ticks(); 925 vertexTime[threadIndex] += time - startTick; 926 startTick = time; 927 #endif 928 929 int visible = 0; 930 931 if(!draw->setupState.rasterizerDiscard) 932 { 933 visible = (this->*setupPrimitives)(unit, count); 934 } 935 936 primitiveProgress[unit].visible = visible; 937 primitiveProgress[unit].references = clusterCount; 938 939 #if PERF_HUD 940 setupTime[threadIndex] += Timer::ticks() - startTick; 941 #endif 942 } 943 break; 944 case Task::PIXELS: 945 { 946 int unit = task[threadIndex].primitiveUnit; 947 int visible = primitiveProgress[unit].visible; 948 949 if(visible > 0) 950 { 951 int cluster = task[threadIndex].pixelCluster; 952 Primitive *primitive = primitiveBatch[unit]; 953 DrawCall *draw = drawList[pixelProgress[cluster].drawCall & DRAW_COUNT_BITS]; 954 DrawData *data = draw->data; 955 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer; 956 957 pixelRoutine(primitive, visible, cluster, data); 958 } 959 960 finishRendering(task[threadIndex]); 961 962 #if PERF_HUD 963 pixelTime[threadIndex] += Timer::ticks() - startTick; 964 #endif 965 } 966 break; 967 case Task::RESUME: 968 break; 969 case Task::SUSPEND: 970 break; 971 default: 972 ASSERT(false); 973 } 974 } 975 synchronize()976 void Renderer::synchronize() 977 { 978 sync->lock(sw::PUBLIC); 979 sync->unlock(); 980 } 981 finishRendering(Task & pixelTask)982 void Renderer::finishRendering(Task &pixelTask) 983 { 984 int unit = pixelTask.primitiveUnit; 985 int cluster = pixelTask.pixelCluster; 986 987 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 988 DrawData &data = *draw.data; 989 int primitive = primitiveProgress[unit].firstPrimitive; 990 int count = primitiveProgress[unit].primitiveCount; 991 int processedPrimitives = primitive + count; 992 993 pixelProgress[cluster].processedPrimitives = processedPrimitives; 994 995 if(pixelProgress[cluster].processedPrimitives >= draw.count) 996 { 997 ++pixelProgress[cluster].drawCall; // Atomic 998 pixelProgress[cluster].processedPrimitives = 0; 999 } 1000 1001 int ref = primitiveProgress[unit].references--; // Atomic 1002 1003 if(ref == 0) 1004 { 1005 ref = draw.references--; // Atomic 1006 1007 if(ref == 0) 1008 { 1009 #if PERF_PROFILE 1010 for(int cluster = 0; cluster < clusterCount; cluster++) 1011 { 1012 for(int i = 0; i < PERF_TIMERS; i++) 1013 { 1014 profiler.cycles[i] += data.cycles[i][cluster]; 1015 } 1016 } 1017 #endif 1018 1019 if(draw.queries) 1020 { 1021 for(auto &query : *(draw.queries)) 1022 { 1023 switch(query->type) 1024 { 1025 case Query::FRAGMENTS_PASSED: 1026 for(int cluster = 0; cluster < clusterCount; cluster++) 1027 { 1028 query->data += data.occlusion[cluster]; 1029 } 1030 break; 1031 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: 1032 query->data += processedPrimitives; 1033 break; 1034 default: 1035 break; 1036 } 1037 1038 query->release(); 1039 } 1040 1041 delete draw.queries; 1042 draw.queries = 0; 1043 } 1044 1045 for(int i = 0; i < RENDERTARGETS; i++) 1046 { 1047 if(draw.renderTarget[i]) 1048 { 1049 draw.renderTarget[i]->unlockInternal(); 1050 } 1051 } 1052 1053 if(draw.depthBuffer) 1054 { 1055 draw.depthBuffer->unlockInternal(); 1056 } 1057 1058 if(draw.stencilBuffer) 1059 { 1060 draw.stencilBuffer->unlockStencil(); 1061 } 1062 1063 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++) 1064 { 1065 if(draw.texture[i]) 1066 { 1067 draw.texture[i]->unlock(); 1068 } 1069 } 1070 1071 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 1072 { 1073 if(draw.vertexStream[i]) 1074 { 1075 draw.vertexStream[i]->unlock(); 1076 } 1077 } 1078 1079 if(draw.indexBuffer) 1080 { 1081 draw.indexBuffer->unlock(); 1082 } 1083 1084 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 1085 { 1086 if(draw.pUniformBuffers[i]) 1087 { 1088 draw.pUniformBuffers[i]->unlock(); 1089 } 1090 if(draw.vUniformBuffers[i]) 1091 { 1092 draw.vUniformBuffers[i]->unlock(); 1093 } 1094 } 1095 1096 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 1097 { 1098 if(draw.transformFeedbackBuffers[i]) 1099 { 1100 draw.transformFeedbackBuffers[i]->unlock(); 1101 } 1102 } 1103 1104 draw.vertexRoutine.reset(); 1105 draw.setupRoutine.reset(); 1106 draw.pixelRoutine.reset(); 1107 1108 sync->unlock(); 1109 1110 draw.references = -1; 1111 resumeApp->signal(); 1112 } 1113 } 1114 1115 pixelProgress[cluster].executing = false; 1116 } 1117 processPrimitiveVertices(int unit,unsigned int start,unsigned int triangleCount,unsigned int loop,int thread)1118 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread) 1119 { 1120 Triangle *triangle = triangleBatch[unit]; 1121 int primitiveDrawCall = primitiveProgress[unit].drawCall; 1122 DrawCall *draw = drawList[primitiveDrawCall & DRAW_COUNT_BITS]; 1123 DrawData *data = draw->data; 1124 VertexTask *task = vertexTask[thread]; 1125 1126 const void *indices = data->indices; 1127 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer; 1128 1129 if(task->vertexCache.drawCall != primitiveDrawCall) 1130 { 1131 task->vertexCache.clear(); 1132 task->vertexCache.drawCall = primitiveDrawCall; 1133 } 1134 1135 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size 1136 1137 switch(draw->drawType) 1138 { 1139 case DRAW_POINTLIST: 1140 { 1141 unsigned int index = start; 1142 1143 for(unsigned int i = 0; i < triangleCount; i++) 1144 { 1145 batch[i][0] = index; 1146 batch[i][1] = index; 1147 batch[i][2] = index; 1148 1149 index += 1; 1150 } 1151 } 1152 break; 1153 case DRAW_LINELIST: 1154 { 1155 unsigned int index = 2 * start; 1156 1157 for(unsigned int i = 0; i < triangleCount; i++) 1158 { 1159 batch[i][0] = index + 0; 1160 batch[i][1] = index + 1; 1161 batch[i][2] = index + 1; 1162 1163 index += 2; 1164 } 1165 } 1166 break; 1167 case DRAW_LINESTRIP: 1168 { 1169 unsigned int index = start; 1170 1171 for(unsigned int i = 0; i < triangleCount; i++) 1172 { 1173 batch[i][0] = index + 0; 1174 batch[i][1] = index + 1; 1175 batch[i][2] = index + 1; 1176 1177 index += 1; 1178 } 1179 } 1180 break; 1181 case DRAW_LINELOOP: 1182 { 1183 unsigned int index = start; 1184 1185 for(unsigned int i = 0; i < triangleCount; i++) 1186 { 1187 batch[i][0] = (index + 0) % loop; 1188 batch[i][1] = (index + 1) % loop; 1189 batch[i][2] = (index + 1) % loop; 1190 1191 index += 1; 1192 } 1193 } 1194 break; 1195 case DRAW_TRIANGLELIST: 1196 { 1197 unsigned int index = 3 * start; 1198 1199 for(unsigned int i = 0; i < triangleCount; i++) 1200 { 1201 batch[i][0] = index + 0; 1202 batch[i][1] = index + 1; 1203 batch[i][2] = index + 2; 1204 1205 index += 3; 1206 } 1207 } 1208 break; 1209 case DRAW_TRIANGLESTRIP: 1210 { 1211 unsigned int index = start; 1212 1213 for(unsigned int i = 0; i < triangleCount; i++) 1214 { 1215 if(leadingVertexFirst) 1216 { 1217 batch[i][0] = index + 0; 1218 batch[i][1] = index + (index & 1) + 1; 1219 batch[i][2] = index + (~index & 1) + 1; 1220 } 1221 else 1222 { 1223 batch[i][0] = index + (index & 1); 1224 batch[i][1] = index + (~index & 1); 1225 batch[i][2] = index + 2; 1226 } 1227 1228 index += 1; 1229 } 1230 } 1231 break; 1232 case DRAW_TRIANGLEFAN: 1233 { 1234 unsigned int index = start; 1235 1236 for(unsigned int i = 0; i < triangleCount; i++) 1237 { 1238 if(leadingVertexFirst) 1239 { 1240 batch[i][0] = index + 1; 1241 batch[i][1] = index + 2; 1242 batch[i][2] = 0; 1243 } 1244 else 1245 { 1246 batch[i][0] = 0; 1247 batch[i][1] = index + 1; 1248 batch[i][2] = index + 2; 1249 } 1250 1251 index += 1; 1252 } 1253 } 1254 break; 1255 case DRAW_INDEXEDPOINTLIST8: 1256 { 1257 const unsigned char *index = (const unsigned char*)indices + start; 1258 1259 for(unsigned int i = 0; i < triangleCount; i++) 1260 { 1261 batch[i][0] = *index; 1262 batch[i][1] = *index; 1263 batch[i][2] = *index; 1264 1265 index += 1; 1266 } 1267 } 1268 break; 1269 case DRAW_INDEXEDPOINTLIST16: 1270 { 1271 const unsigned short *index = (const unsigned short*)indices + start; 1272 1273 for(unsigned int i = 0; i < triangleCount; i++) 1274 { 1275 batch[i][0] = *index; 1276 batch[i][1] = *index; 1277 batch[i][2] = *index; 1278 1279 index += 1; 1280 } 1281 } 1282 break; 1283 case DRAW_INDEXEDPOINTLIST32: 1284 { 1285 const unsigned int *index = (const unsigned int*)indices + start; 1286 1287 for(unsigned int i = 0; i < triangleCount; i++) 1288 { 1289 batch[i][0] = *index; 1290 batch[i][1] = *index; 1291 batch[i][2] = *index; 1292 1293 index += 1; 1294 } 1295 } 1296 break; 1297 case DRAW_INDEXEDLINELIST8: 1298 { 1299 const unsigned char *index = (const unsigned char*)indices + 2 * start; 1300 1301 for(unsigned int i = 0; i < triangleCount; i++) 1302 { 1303 batch[i][0] = index[0]; 1304 batch[i][1] = index[1]; 1305 batch[i][2] = index[1]; 1306 1307 index += 2; 1308 } 1309 } 1310 break; 1311 case DRAW_INDEXEDLINELIST16: 1312 { 1313 const unsigned short *index = (const unsigned short*)indices + 2 * start; 1314 1315 for(unsigned int i = 0; i < triangleCount; i++) 1316 { 1317 batch[i][0] = index[0]; 1318 batch[i][1] = index[1]; 1319 batch[i][2] = index[1]; 1320 1321 index += 2; 1322 } 1323 } 1324 break; 1325 case DRAW_INDEXEDLINELIST32: 1326 { 1327 const unsigned int *index = (const unsigned int*)indices + 2 * start; 1328 1329 for(unsigned int i = 0; i < triangleCount; i++) 1330 { 1331 batch[i][0] = index[0]; 1332 batch[i][1] = index[1]; 1333 batch[i][2] = index[1]; 1334 1335 index += 2; 1336 } 1337 } 1338 break; 1339 case DRAW_INDEXEDLINESTRIP8: 1340 { 1341 const unsigned char *index = (const unsigned char*)indices + start; 1342 1343 for(unsigned int i = 0; i < triangleCount; i++) 1344 { 1345 batch[i][0] = index[0]; 1346 batch[i][1] = index[1]; 1347 batch[i][2] = index[1]; 1348 1349 index += 1; 1350 } 1351 } 1352 break; 1353 case DRAW_INDEXEDLINESTRIP16: 1354 { 1355 const unsigned short *index = (const unsigned short*)indices + start; 1356 1357 for(unsigned int i = 0; i < triangleCount; i++) 1358 { 1359 batch[i][0] = index[0]; 1360 batch[i][1] = index[1]; 1361 batch[i][2] = index[1]; 1362 1363 index += 1; 1364 } 1365 } 1366 break; 1367 case DRAW_INDEXEDLINESTRIP32: 1368 { 1369 const unsigned int *index = (const unsigned int*)indices + start; 1370 1371 for(unsigned int i = 0; i < triangleCount; i++) 1372 { 1373 batch[i][0] = index[0]; 1374 batch[i][1] = index[1]; 1375 batch[i][2] = index[1]; 1376 1377 index += 1; 1378 } 1379 } 1380 break; 1381 case DRAW_INDEXEDLINELOOP8: 1382 { 1383 const unsigned char *index = (const unsigned char*)indices; 1384 1385 for(unsigned int i = 0; i < triangleCount; i++) 1386 { 1387 batch[i][0] = index[(start + i + 0) % loop]; 1388 batch[i][1] = index[(start + i + 1) % loop]; 1389 batch[i][2] = index[(start + i + 1) % loop]; 1390 } 1391 } 1392 break; 1393 case DRAW_INDEXEDLINELOOP16: 1394 { 1395 const unsigned short *index = (const unsigned short*)indices; 1396 1397 for(unsigned int i = 0; i < triangleCount; i++) 1398 { 1399 batch[i][0] = index[(start + i + 0) % loop]; 1400 batch[i][1] = index[(start + i + 1) % loop]; 1401 batch[i][2] = index[(start + i + 1) % loop]; 1402 } 1403 } 1404 break; 1405 case DRAW_INDEXEDLINELOOP32: 1406 { 1407 const unsigned int *index = (const unsigned int*)indices; 1408 1409 for(unsigned int i = 0; i < triangleCount; i++) 1410 { 1411 batch[i][0] = index[(start + i + 0) % loop]; 1412 batch[i][1] = index[(start + i + 1) % loop]; 1413 batch[i][2] = index[(start + i + 1) % loop]; 1414 } 1415 } 1416 break; 1417 case DRAW_INDEXEDTRIANGLELIST8: 1418 { 1419 const unsigned char *index = (const unsigned char*)indices + 3 * start; 1420 1421 for(unsigned int i = 0; i < triangleCount; i++) 1422 { 1423 batch[i][0] = index[0]; 1424 batch[i][1] = index[1]; 1425 batch[i][2] = index[2]; 1426 1427 index += 3; 1428 } 1429 } 1430 break; 1431 case DRAW_INDEXEDTRIANGLELIST16: 1432 { 1433 const unsigned short *index = (const unsigned short*)indices + 3 * start; 1434 1435 for(unsigned int i = 0; i < triangleCount; i++) 1436 { 1437 batch[i][0] = index[0]; 1438 batch[i][1] = index[1]; 1439 batch[i][2] = index[2]; 1440 1441 index += 3; 1442 } 1443 } 1444 break; 1445 case DRAW_INDEXEDTRIANGLELIST32: 1446 { 1447 const unsigned int *index = (const unsigned int*)indices + 3 * start; 1448 1449 for(unsigned int i = 0; i < triangleCount; i++) 1450 { 1451 batch[i][0] = index[0]; 1452 batch[i][1] = index[1]; 1453 batch[i][2] = index[2]; 1454 1455 index += 3; 1456 } 1457 } 1458 break; 1459 case DRAW_INDEXEDTRIANGLESTRIP8: 1460 { 1461 const unsigned char *index = (const unsigned char*)indices + start; 1462 1463 for(unsigned int i = 0; i < triangleCount; i++) 1464 { 1465 batch[i][0] = index[0]; 1466 batch[i][1] = index[((start + i) & 1) + 1]; 1467 batch[i][2] = index[(~(start + i) & 1) + 1]; 1468 1469 index += 1; 1470 } 1471 } 1472 break; 1473 case DRAW_INDEXEDTRIANGLESTRIP16: 1474 { 1475 const unsigned short *index = (const unsigned short*)indices + start; 1476 1477 for(unsigned int i = 0; i < triangleCount; i++) 1478 { 1479 batch[i][0] = index[0]; 1480 batch[i][1] = index[((start + i) & 1) + 1]; 1481 batch[i][2] = index[(~(start + i) & 1) + 1]; 1482 1483 index += 1; 1484 } 1485 } 1486 break; 1487 case DRAW_INDEXEDTRIANGLESTRIP32: 1488 { 1489 const unsigned int *index = (const unsigned int*)indices + start; 1490 1491 for(unsigned int i = 0; i < triangleCount; i++) 1492 { 1493 batch[i][0] = index[0]; 1494 batch[i][1] = index[((start + i) & 1) + 1]; 1495 batch[i][2] = index[(~(start + i) & 1) + 1]; 1496 1497 index += 1; 1498 } 1499 } 1500 break; 1501 case DRAW_INDEXEDTRIANGLEFAN8: 1502 { 1503 const unsigned char *index = (const unsigned char*)indices; 1504 1505 for(unsigned int i = 0; i < triangleCount; i++) 1506 { 1507 batch[i][0] = index[start + i + 1]; 1508 batch[i][1] = index[start + i + 2]; 1509 batch[i][2] = index[0]; 1510 } 1511 } 1512 break; 1513 case DRAW_INDEXEDTRIANGLEFAN16: 1514 { 1515 const unsigned short *index = (const unsigned short*)indices; 1516 1517 for(unsigned int i = 0; i < triangleCount; i++) 1518 { 1519 batch[i][0] = index[start + i + 1]; 1520 batch[i][1] = index[start + i + 2]; 1521 batch[i][2] = index[0]; 1522 } 1523 } 1524 break; 1525 case DRAW_INDEXEDTRIANGLEFAN32: 1526 { 1527 const unsigned int *index = (const unsigned int*)indices; 1528 1529 for(unsigned int i = 0; i < triangleCount; i++) 1530 { 1531 batch[i][0] = index[start + i + 1]; 1532 batch[i][1] = index[start + i + 2]; 1533 batch[i][2] = index[0]; 1534 } 1535 } 1536 break; 1537 case DRAW_QUADLIST: 1538 { 1539 unsigned int index = 4 * start / 2; 1540 1541 for(unsigned int i = 0; i < triangleCount; i += 2) 1542 { 1543 batch[i+0][0] = index + 0; 1544 batch[i+0][1] = index + 1; 1545 batch[i+0][2] = index + 2; 1546 1547 batch[i+1][0] = index + 0; 1548 batch[i+1][1] = index + 2; 1549 batch[i+1][2] = index + 3; 1550 1551 index += 4; 1552 } 1553 } 1554 break; 1555 default: 1556 ASSERT(false); 1557 return; 1558 } 1559 1560 task->primitiveStart = start; 1561 task->vertexCount = triangleCount * 3; 1562 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data); 1563 } 1564 setupSolidTriangles(int unit,int count)1565 int Renderer::setupSolidTriangles(int unit, int count) 1566 { 1567 Triangle *triangle = triangleBatch[unit]; 1568 Primitive *primitive = primitiveBatch[unit]; 1569 1570 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1571 SetupProcessor::State &state = draw.setupState; 1572 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1573 1574 int ms = state.multiSample; 1575 int pos = state.positionRegister; 1576 const DrawData *data = draw.data; 1577 int visible = 0; 1578 1579 for(int i = 0; i < count; i++, triangle++) 1580 { 1581 Vertex &v0 = triangle->v0; 1582 Vertex &v1 = triangle->v1; 1583 Vertex &v2 = triangle->v2; 1584 1585 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) 1586 { 1587 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); 1588 1589 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; 1590 1591 if(clipFlagsOr != Clipper::CLIP_FINITE) 1592 { 1593 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1594 { 1595 continue; 1596 } 1597 } 1598 1599 if(setupRoutine(primitive, triangle, &polygon, data)) 1600 { 1601 primitive += ms; 1602 visible++; 1603 } 1604 } 1605 } 1606 1607 return visible; 1608 } 1609 setupWireframeTriangle(int unit,int count)1610 int Renderer::setupWireframeTriangle(int unit, int count) 1611 { 1612 Triangle *triangle = triangleBatch[unit]; 1613 Primitive *primitive = primitiveBatch[unit]; 1614 int visible = 0; 1615 1616 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1617 SetupProcessor::State &state = draw.setupState; 1618 1619 const Vertex &v0 = triangle[0].v0; 1620 const Vertex &v1 = triangle[0].v1; 1621 const Vertex &v2 = triangle[0].v2; 1622 1623 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1624 1625 if(state.cullMode == CULL_CLOCKWISE) 1626 { 1627 if(d >= 0) return 0; 1628 } 1629 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1630 { 1631 if(d <= 0) return 0; 1632 } 1633 1634 // Copy attributes 1635 triangle[1].v0 = v1; 1636 triangle[1].v1 = v2; 1637 triangle[2].v0 = v2; 1638 triangle[2].v1 = v0; 1639 1640 if(state.color[0][0].flat) // FIXME 1641 { 1642 for(int i = 0; i < 2; i++) 1643 { 1644 triangle[1].v0.C[i] = triangle[0].v0.C[i]; 1645 triangle[1].v1.C[i] = triangle[0].v0.C[i]; 1646 triangle[2].v0.C[i] = triangle[0].v0.C[i]; 1647 triangle[2].v1.C[i] = triangle[0].v0.C[i]; 1648 } 1649 } 1650 1651 for(int i = 0; i < 3; i++) 1652 { 1653 if(setupLine(*primitive, *triangle, draw)) 1654 { 1655 primitive->area = 0.5f * d; 1656 1657 primitive++; 1658 visible++; 1659 } 1660 1661 triangle++; 1662 } 1663 1664 return visible; 1665 } 1666 setupVertexTriangle(int unit,int count)1667 int Renderer::setupVertexTriangle(int unit, int count) 1668 { 1669 Triangle *triangle = triangleBatch[unit]; 1670 Primitive *primitive = primitiveBatch[unit]; 1671 int visible = 0; 1672 1673 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1674 SetupProcessor::State &state = draw.setupState; 1675 1676 const Vertex &v0 = triangle[0].v0; 1677 const Vertex &v1 = triangle[0].v1; 1678 const Vertex &v2 = triangle[0].v2; 1679 1680 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1681 1682 if(state.cullMode == CULL_CLOCKWISE) 1683 { 1684 if(d >= 0) return 0; 1685 } 1686 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1687 { 1688 if(d <= 0) return 0; 1689 } 1690 1691 // Copy attributes 1692 triangle[1].v0 = v1; 1693 triangle[2].v0 = v2; 1694 1695 for(int i = 0; i < 3; i++) 1696 { 1697 if(setupPoint(*primitive, *triangle, draw)) 1698 { 1699 primitive->area = 0.5f * d; 1700 1701 primitive++; 1702 visible++; 1703 } 1704 1705 triangle++; 1706 } 1707 1708 return visible; 1709 } 1710 setupLines(int unit,int count)1711 int Renderer::setupLines(int unit, int count) 1712 { 1713 Triangle *triangle = triangleBatch[unit]; 1714 Primitive *primitive = primitiveBatch[unit]; 1715 int visible = 0; 1716 1717 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1718 SetupProcessor::State &state = draw.setupState; 1719 1720 int ms = state.multiSample; 1721 1722 for(int i = 0; i < count; i++) 1723 { 1724 if(setupLine(*primitive, *triangle, draw)) 1725 { 1726 primitive += ms; 1727 visible++; 1728 } 1729 1730 triangle++; 1731 } 1732 1733 return visible; 1734 } 1735 setupPoints(int unit,int count)1736 int Renderer::setupPoints(int unit, int count) 1737 { 1738 Triangle *triangle = triangleBatch[unit]; 1739 Primitive *primitive = primitiveBatch[unit]; 1740 int visible = 0; 1741 1742 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall & DRAW_COUNT_BITS]; 1743 SetupProcessor::State &state = draw.setupState; 1744 1745 int ms = state.multiSample; 1746 1747 for(int i = 0; i < count; i++) 1748 { 1749 if(setupPoint(*primitive, *triangle, draw)) 1750 { 1751 primitive += ms; 1752 visible++; 1753 } 1754 1755 triangle++; 1756 } 1757 1758 return visible; 1759 } 1760 setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1761 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1762 { 1763 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1764 const SetupProcessor::State &state = draw.setupState; 1765 const DrawData &data = *draw.data; 1766 1767 float lineWidth = data.lineWidth; 1768 1769 Vertex &v0 = triangle.v0; 1770 Vertex &v1 = triangle.v1; 1771 1772 int pos = state.positionRegister; 1773 1774 const float4 &P0 = v0.v[pos]; 1775 const float4 &P1 = v1.v[pos]; 1776 1777 if(P0.w <= 0 && P1.w <= 0) 1778 { 1779 return false; 1780 } 1781 1782 const float W = data.Wx16[0] * (1.0f / 16.0f); 1783 const float H = data.Hx16[0] * (1.0f / 16.0f); 1784 1785 float dx = W * (P1.x / P1.w - P0.x / P0.w); 1786 float dy = H * (P1.y / P1.w - P0.y / P0.w); 1787 1788 if(dx == 0 && dy == 0) 1789 { 1790 return false; 1791 } 1792 1793 if(state.multiSample > 1) 1794 { 1795 // Rectangle centered on the line segment 1796 1797 float4 P[4]; 1798 int C[4]; 1799 1800 P[0] = P0; 1801 P[1] = P1; 1802 P[2] = P1; 1803 P[3] = P0; 1804 1805 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); 1806 1807 dx *= scale; 1808 dy *= scale; 1809 1810 float dx0h = dx * P0.w / H; 1811 float dy0w = dy * P0.w / W; 1812 1813 float dx1h = dx * P1.w / H; 1814 float dy1w = dy * P1.w / W; 1815 1816 P[0].x += -dy0w; 1817 P[0].y += +dx0h; 1818 C[0] = clipper->computeClipFlags(P[0]); 1819 1820 P[1].x += -dy1w; 1821 P[1].y += +dx1h; 1822 C[1] = clipper->computeClipFlags(P[1]); 1823 1824 P[2].x += +dy1w; 1825 P[2].y += -dx1h; 1826 C[2] = clipper->computeClipFlags(P[2]); 1827 1828 P[3].x += +dy0w; 1829 P[3].y += -dx0h; 1830 C[3] = clipper->computeClipFlags(P[3]); 1831 1832 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1833 { 1834 Polygon polygon(P, 4); 1835 1836 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1837 1838 if(clipFlagsOr != Clipper::CLIP_FINITE) 1839 { 1840 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1841 { 1842 return false; 1843 } 1844 } 1845 1846 return setupRoutine(&primitive, &triangle, &polygon, &data); 1847 } 1848 } 1849 else if(true) 1850 { 1851 // Connecting diamonds polygon 1852 // This shape satisfies the diamond test convention, except for the exit rule part. 1853 // Line segments with overlapping endpoints have duplicate fragments. 1854 // The ideal algorithm requires half-open line rasterization (b/80135519). 1855 1856 float4 P[8]; 1857 int C[8]; 1858 1859 P[0] = P0; 1860 P[1] = P0; 1861 P[2] = P0; 1862 P[3] = P0; 1863 P[4] = P1; 1864 P[5] = P1; 1865 P[6] = P1; 1866 P[7] = P1; 1867 1868 float dx0 = lineWidth * 0.5f * P0.w / W; 1869 float dy0 = lineWidth * 0.5f * P0.w / H; 1870 1871 float dx1 = lineWidth * 0.5f * P1.w / W; 1872 float dy1 = lineWidth * 0.5f * P1.w / H; 1873 1874 P[0].x += -dx0; 1875 C[0] = clipper->computeClipFlags(P[0]); 1876 1877 P[1].y += +dy0; 1878 C[1] = clipper->computeClipFlags(P[1]); 1879 1880 P[2].x += +dx0; 1881 C[2] = clipper->computeClipFlags(P[2]); 1882 1883 P[3].y += -dy0; 1884 C[3] = clipper->computeClipFlags(P[3]); 1885 1886 P[4].x += -dx1; 1887 C[4] = clipper->computeClipFlags(P[4]); 1888 1889 P[5].y += +dy1; 1890 C[5] = clipper->computeClipFlags(P[5]); 1891 1892 P[6].x += +dx1; 1893 C[6] = clipper->computeClipFlags(P[6]); 1894 1895 P[7].y += -dy1; 1896 C[7] = clipper->computeClipFlags(P[7]); 1897 1898 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) 1899 { 1900 float4 L[6]; 1901 1902 if(dx > -dy) 1903 { 1904 if(dx > dy) // Right 1905 { 1906 L[0] = P[0]; 1907 L[1] = P[1]; 1908 L[2] = P[5]; 1909 L[3] = P[6]; 1910 L[4] = P[7]; 1911 L[5] = P[3]; 1912 } 1913 else // Down 1914 { 1915 L[0] = P[0]; 1916 L[1] = P[4]; 1917 L[2] = P[5]; 1918 L[3] = P[6]; 1919 L[4] = P[2]; 1920 L[5] = P[3]; 1921 } 1922 } 1923 else 1924 { 1925 if(dx > dy) // Up 1926 { 1927 L[0] = P[0]; 1928 L[1] = P[1]; 1929 L[2] = P[2]; 1930 L[3] = P[6]; 1931 L[4] = P[7]; 1932 L[5] = P[4]; 1933 } 1934 else // Left 1935 { 1936 L[0] = P[1]; 1937 L[1] = P[2]; 1938 L[2] = P[3]; 1939 L[3] = P[7]; 1940 L[4] = P[4]; 1941 L[5] = P[5]; 1942 } 1943 } 1944 1945 Polygon polygon(L, 6); 1946 1947 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags; 1948 1949 if(clipFlagsOr != Clipper::CLIP_FINITE) 1950 { 1951 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1952 { 1953 return false; 1954 } 1955 } 1956 1957 return setupRoutine(&primitive, &triangle, &polygon, &data); 1958 } 1959 } 1960 else 1961 { 1962 // Parallelogram approximating Bresenham line 1963 // This algorithm does not satisfy the ideal diamond-exit rule, but does avoid the 1964 // duplicate fragment rasterization problem and satisfies all of Vulkan's minimum 1965 // requirements for Bresenham line segment rasterization. 1966 1967 float4 P[8]; 1968 P[0] = P0; 1969 P[1] = P0; 1970 P[2] = P0; 1971 P[3] = P0; 1972 P[4] = P1; 1973 P[5] = P1; 1974 P[6] = P1; 1975 P[7] = P1; 1976 1977 float dx0 = lineWidth * 0.5f * P0.w / W; 1978 float dy0 = lineWidth * 0.5f * P0.w / H; 1979 1980 float dx1 = lineWidth * 0.5f * P1.w / W; 1981 float dy1 = lineWidth * 0.5f * P1.w / H; 1982 1983 P[0].x += -dx0; 1984 P[1].y += +dy0; 1985 P[2].x += +dx0; 1986 P[3].y += -dy0; 1987 P[4].x += -dx1; 1988 P[5].y += +dy1; 1989 P[6].x += +dx1; 1990 P[7].y += -dy1; 1991 1992 float4 L[4]; 1993 1994 if(dx > -dy) 1995 { 1996 if(dx > dy) // Right 1997 { 1998 L[0] = P[1]; 1999 L[1] = P[5]; 2000 L[2] = P[7]; 2001 L[3] = P[3]; 2002 } 2003 else // Down 2004 { 2005 L[0] = P[0]; 2006 L[1] = P[4]; 2007 L[2] = P[6]; 2008 L[3] = P[2]; 2009 } 2010 } 2011 else 2012 { 2013 if(dx > dy) // Up 2014 { 2015 L[0] = P[0]; 2016 L[1] = P[2]; 2017 L[2] = P[6]; 2018 L[3] = P[4]; 2019 } 2020 else // Left 2021 { 2022 L[0] = P[1]; 2023 L[1] = P[3]; 2024 L[2] = P[7]; 2025 L[3] = P[5]; 2026 } 2027 } 2028 2029 int C0 = clipper->computeClipFlags(L[0]); 2030 int C1 = clipper->computeClipFlags(L[1]); 2031 int C2 = clipper->computeClipFlags(L[2]); 2032 int C3 = clipper->computeClipFlags(L[3]); 2033 2034 if((C0 & C1 & C2 & C3) == Clipper::CLIP_FINITE) 2035 { 2036 Polygon polygon(L, 4); 2037 2038 int clipFlagsOr = C0 | C1 | C2 | C3; 2039 2040 if(clipFlagsOr != Clipper::CLIP_FINITE) 2041 { 2042 if(!clipper->clip(polygon, clipFlagsOr, draw)) 2043 { 2044 return false; 2045 } 2046 } 2047 2048 return setupRoutine(&primitive, &triangle, &polygon, &data); 2049 } 2050 } 2051 2052 return false; 2053 } 2054 setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)2055 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 2056 { 2057 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 2058 const SetupProcessor::State &state = draw.setupState; 2059 const DrawData &data = *draw.data; 2060 2061 Vertex &v = triangle.v0; 2062 2063 float pSize; 2064 2065 int pts = state.pointSizeRegister; 2066 2067 if(state.pointSizeRegister != Unused) 2068 { 2069 pSize = v.v[pts].y; 2070 } 2071 else 2072 { 2073 pSize = data.point.pointSize[0]; 2074 } 2075 2076 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax); 2077 2078 float4 P[4]; 2079 int C[4]; 2080 2081 int pos = state.positionRegister; 2082 2083 P[0] = v.v[pos]; 2084 P[1] = v.v[pos]; 2085 P[2] = v.v[pos]; 2086 P[3] = v.v[pos]; 2087 2088 const float X = pSize * P[0].w * data.halfPixelX[0]; 2089 const float Y = pSize * P[0].w * data.halfPixelY[0]; 2090 2091 P[0].x -= X; 2092 P[0].y += Y; 2093 C[0] = clipper->computeClipFlags(P[0]); 2094 2095 P[1].x += X; 2096 P[1].y += Y; 2097 C[1] = clipper->computeClipFlags(P[1]); 2098 2099 P[2].x += X; 2100 P[2].y -= Y; 2101 C[2] = clipper->computeClipFlags(P[2]); 2102 2103 P[3].x -= X; 2104 P[3].y -= Y; 2105 C[3] = clipper->computeClipFlags(P[3]); 2106 2107 Polygon polygon(P, 4); 2108 2109 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 2110 { 2111 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 2112 2113 if(clipFlagsOr != Clipper::CLIP_FINITE) 2114 { 2115 if(!clipper->clip(polygon, clipFlagsOr, draw)) 2116 { 2117 return false; 2118 } 2119 } 2120 2121 triangle.v1 = triangle.v0; 2122 triangle.v2 = triangle.v0; 2123 2124 triangle.v1.X += iround(16 * 0.5f * pSize); 2125 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner 2126 return setupRoutine(&primitive, &triangle, &polygon, &data); 2127 } 2128 2129 return false; 2130 } 2131 initializeThreads()2132 void Renderer::initializeThreads() 2133 { 2134 unitCount = ceilPow2(threadCount); 2135 clusterCount = ceilPow2(threadCount); 2136 2137 for(int i = 0; i < unitCount; i++) 2138 { 2139 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle)); 2140 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive)); 2141 } 2142 2143 for(int i = 0; i < threadCount; i++) 2144 { 2145 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask)); 2146 vertexTask[i]->vertexCache.drawCall = -1; 2147 2148 task[i].type = Task::SUSPEND; 2149 2150 resume[i] = new Event(); 2151 suspend[i] = new Event(); 2152 2153 Parameters parameters; 2154 parameters.threadIndex = i; 2155 parameters.renderer = this; 2156 2157 exitThreads = false; 2158 worker[i] = new Thread(threadFunction, ¶meters); 2159 2160 suspend[i]->wait(); 2161 suspend[i]->signal(); 2162 } 2163 } 2164 terminateThreads()2165 void Renderer::terminateThreads() 2166 { 2167 while(threadsAwake != 0) 2168 { 2169 Thread::sleep(1); 2170 } 2171 2172 for(int thread = 0; thread < threadCount; thread++) 2173 { 2174 if(worker[thread]) 2175 { 2176 exitThreads = true; 2177 resume[thread]->signal(); 2178 worker[thread]->join(); 2179 2180 delete worker[thread]; 2181 worker[thread] = 0; 2182 delete resume[thread]; 2183 resume[thread] = 0; 2184 delete suspend[thread]; 2185 suspend[thread] = 0; 2186 } 2187 2188 deallocate(vertexTask[thread]); 2189 vertexTask[thread] = 0; 2190 } 2191 2192 for(int i = 0; i < 16; i++) 2193 { 2194 deallocate(triangleBatch[i]); 2195 triangleBatch[i] = 0; 2196 2197 deallocate(primitiveBatch[i]); 2198 primitiveBatch[i] = 0; 2199 } 2200 } 2201 loadConstants(const VertexShader * vertexShader)2202 void Renderer::loadConstants(const VertexShader *vertexShader) 2203 { 2204 if(!vertexShader) return; 2205 2206 size_t count = vertexShader->getLength(); 2207 2208 for(size_t i = 0; i < count; i++) 2209 { 2210 const Shader::Instruction *instruction = vertexShader->getInstruction(i); 2211 2212 if(instruction->opcode == Shader::OPCODE_DEF) 2213 { 2214 int index = instruction->dst.index; 2215 float value[4]; 2216 2217 value[0] = instruction->src[0].value[0]; 2218 value[1] = instruction->src[0].value[1]; 2219 value[2] = instruction->src[0].value[2]; 2220 value[3] = instruction->src[0].value[3]; 2221 2222 setVertexShaderConstantF(index, value); 2223 } 2224 else if(instruction->opcode == Shader::OPCODE_DEFI) 2225 { 2226 int index = instruction->dst.index; 2227 int integer[4]; 2228 2229 integer[0] = instruction->src[0].integer[0]; 2230 integer[1] = instruction->src[0].integer[1]; 2231 integer[2] = instruction->src[0].integer[2]; 2232 integer[3] = instruction->src[0].integer[3]; 2233 2234 setVertexShaderConstantI(index, integer); 2235 } 2236 else if(instruction->opcode == Shader::OPCODE_DEFB) 2237 { 2238 int index = instruction->dst.index; 2239 int boolean = instruction->src[0].boolean[0]; 2240 2241 setVertexShaderConstantB(index, &boolean); 2242 } 2243 } 2244 } 2245 loadConstants(const PixelShader * pixelShader)2246 void Renderer::loadConstants(const PixelShader *pixelShader) 2247 { 2248 if(!pixelShader) return; 2249 2250 size_t count = pixelShader->getLength(); 2251 2252 for(size_t i = 0; i < count; i++) 2253 { 2254 const Shader::Instruction *instruction = pixelShader->getInstruction(i); 2255 2256 if(instruction->opcode == Shader::OPCODE_DEF) 2257 { 2258 int index = instruction->dst.index; 2259 float value[4]; 2260 2261 value[0] = instruction->src[0].value[0]; 2262 value[1] = instruction->src[0].value[1]; 2263 value[2] = instruction->src[0].value[2]; 2264 value[3] = instruction->src[0].value[3]; 2265 2266 setPixelShaderConstantF(index, value); 2267 } 2268 else if(instruction->opcode == Shader::OPCODE_DEFI) 2269 { 2270 int index = instruction->dst.index; 2271 int integer[4]; 2272 2273 integer[0] = instruction->src[0].integer[0]; 2274 integer[1] = instruction->src[0].integer[1]; 2275 integer[2] = instruction->src[0].integer[2]; 2276 integer[3] = instruction->src[0].integer[3]; 2277 2278 setPixelShaderConstantI(index, integer); 2279 } 2280 else if(instruction->opcode == Shader::OPCODE_DEFB) 2281 { 2282 int index = instruction->dst.index; 2283 int boolean = instruction->src[0].boolean[0]; 2284 2285 setPixelShaderConstantB(index, &boolean); 2286 } 2287 } 2288 } 2289 setIndexBuffer(Resource * indexBuffer)2290 void Renderer::setIndexBuffer(Resource *indexBuffer) 2291 { 2292 context->indexBuffer = indexBuffer; 2293 } 2294 setMultiSampleMask(unsigned int mask)2295 void Renderer::setMultiSampleMask(unsigned int mask) 2296 { 2297 context->sampleMask = mask; 2298 } 2299 setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)2300 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing) 2301 { 2302 sw::transparencyAntialiasing = transparencyAntialiasing; 2303 } 2304 isReadWriteTexture(int sampler)2305 bool Renderer::isReadWriteTexture(int sampler) 2306 { 2307 for(int index = 0; index < RENDERTARGETS; index++) 2308 { 2309 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource()) 2310 { 2311 return true; 2312 } 2313 } 2314 2315 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource()) 2316 { 2317 return true; 2318 } 2319 2320 return false; 2321 } 2322 updateClipper()2323 void Renderer::updateClipper() 2324 { 2325 if(updateClipPlanes) 2326 { 2327 if(VertexProcessor::isFixedFunction()) // User plane in world space 2328 { 2329 const Matrix &scissorWorld = getViewTransform(); 2330 2331 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0]; 2332 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1]; 2333 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2]; 2334 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3]; 2335 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4]; 2336 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5]; 2337 } 2338 else // User plane in clip space 2339 { 2340 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0]; 2341 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1]; 2342 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2]; 2343 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3]; 2344 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4]; 2345 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5]; 2346 } 2347 2348 updateClipPlanes = false; 2349 } 2350 } 2351 setTextureResource(unsigned int sampler,Resource * resource)2352 void Renderer::setTextureResource(unsigned int sampler, Resource *resource) 2353 { 2354 ASSERT(sampler < TOTAL_IMAGE_UNITS); 2355 2356 context->texture[sampler] = resource; 2357 } 2358 setTextureLevel(unsigned int sampler,unsigned int face,unsigned int level,Surface * surface,TextureType type)2359 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type) 2360 { 2361 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS); 2362 2363 context->sampler[sampler].setTextureLevel(face, level, surface, type); 2364 } 2365 setTextureFilter(SamplerType type,int sampler,FilterType textureFilter)2366 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter) 2367 { 2368 if(type == SAMPLER_PIXEL) 2369 { 2370 PixelProcessor::setTextureFilter(sampler, textureFilter); 2371 } 2372 else 2373 { 2374 VertexProcessor::setTextureFilter(sampler, textureFilter); 2375 } 2376 } 2377 setMipmapFilter(SamplerType type,int sampler,MipmapType mipmapFilter)2378 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter) 2379 { 2380 if(type == SAMPLER_PIXEL) 2381 { 2382 PixelProcessor::setMipmapFilter(sampler, mipmapFilter); 2383 } 2384 else 2385 { 2386 VertexProcessor::setMipmapFilter(sampler, mipmapFilter); 2387 } 2388 } 2389 setGatherEnable(SamplerType type,int sampler,bool enable)2390 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable) 2391 { 2392 if(type == SAMPLER_PIXEL) 2393 { 2394 PixelProcessor::setGatherEnable(sampler, enable); 2395 } 2396 else 2397 { 2398 VertexProcessor::setGatherEnable(sampler, enable); 2399 } 2400 } 2401 setAddressingModeU(SamplerType type,int sampler,AddressingMode addressMode)2402 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode) 2403 { 2404 if(type == SAMPLER_PIXEL) 2405 { 2406 PixelProcessor::setAddressingModeU(sampler, addressMode); 2407 } 2408 else 2409 { 2410 VertexProcessor::setAddressingModeU(sampler, addressMode); 2411 } 2412 } 2413 setAddressingModeV(SamplerType type,int sampler,AddressingMode addressMode)2414 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode) 2415 { 2416 if(type == SAMPLER_PIXEL) 2417 { 2418 PixelProcessor::setAddressingModeV(sampler, addressMode); 2419 } 2420 else 2421 { 2422 VertexProcessor::setAddressingModeV(sampler, addressMode); 2423 } 2424 } 2425 setAddressingModeW(SamplerType type,int sampler,AddressingMode addressMode)2426 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode) 2427 { 2428 if(type == SAMPLER_PIXEL) 2429 { 2430 PixelProcessor::setAddressingModeW(sampler, addressMode); 2431 } 2432 else 2433 { 2434 VertexProcessor::setAddressingModeW(sampler, addressMode); 2435 } 2436 } 2437 setReadSRGB(SamplerType type,int sampler,bool sRGB)2438 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB) 2439 { 2440 if(type == SAMPLER_PIXEL) 2441 { 2442 PixelProcessor::setReadSRGB(sampler, sRGB); 2443 } 2444 else 2445 { 2446 VertexProcessor::setReadSRGB(sampler, sRGB); 2447 } 2448 } 2449 setMipmapLOD(SamplerType type,int sampler,float bias)2450 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias) 2451 { 2452 if(type == SAMPLER_PIXEL) 2453 { 2454 PixelProcessor::setMipmapLOD(sampler, bias); 2455 } 2456 else 2457 { 2458 VertexProcessor::setMipmapLOD(sampler, bias); 2459 } 2460 } 2461 setBorderColor(SamplerType type,int sampler,const Color<float> & borderColor)2462 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor) 2463 { 2464 if(type == SAMPLER_PIXEL) 2465 { 2466 PixelProcessor::setBorderColor(sampler, borderColor); 2467 } 2468 else 2469 { 2470 VertexProcessor::setBorderColor(sampler, borderColor); 2471 } 2472 } 2473 setMaxAnisotropy(SamplerType type,int sampler,float maxAnisotropy)2474 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy) 2475 { 2476 if(type == SAMPLER_PIXEL) 2477 { 2478 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2479 } 2480 else 2481 { 2482 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2483 } 2484 } 2485 setHighPrecisionFiltering(SamplerType type,int sampler,bool highPrecisionFiltering)2486 void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering) 2487 { 2488 if(type == SAMPLER_PIXEL) 2489 { 2490 PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 2491 } 2492 else 2493 { 2494 VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 2495 } 2496 } 2497 setSwizzleR(SamplerType type,int sampler,SwizzleType swizzleR)2498 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR) 2499 { 2500 if(type == SAMPLER_PIXEL) 2501 { 2502 PixelProcessor::setSwizzleR(sampler, swizzleR); 2503 } 2504 else 2505 { 2506 VertexProcessor::setSwizzleR(sampler, swizzleR); 2507 } 2508 } 2509 setSwizzleG(SamplerType type,int sampler,SwizzleType swizzleG)2510 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG) 2511 { 2512 if(type == SAMPLER_PIXEL) 2513 { 2514 PixelProcessor::setSwizzleG(sampler, swizzleG); 2515 } 2516 else 2517 { 2518 VertexProcessor::setSwizzleG(sampler, swizzleG); 2519 } 2520 } 2521 setSwizzleB(SamplerType type,int sampler,SwizzleType swizzleB)2522 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB) 2523 { 2524 if(type == SAMPLER_PIXEL) 2525 { 2526 PixelProcessor::setSwizzleB(sampler, swizzleB); 2527 } 2528 else 2529 { 2530 VertexProcessor::setSwizzleB(sampler, swizzleB); 2531 } 2532 } 2533 setSwizzleA(SamplerType type,int sampler,SwizzleType swizzleA)2534 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA) 2535 { 2536 if(type == SAMPLER_PIXEL) 2537 { 2538 PixelProcessor::setSwizzleA(sampler, swizzleA); 2539 } 2540 else 2541 { 2542 VertexProcessor::setSwizzleA(sampler, swizzleA); 2543 } 2544 } 2545 setCompareFunc(SamplerType type,int sampler,CompareFunc compFunc)2546 void Renderer::setCompareFunc(SamplerType type, int sampler, CompareFunc compFunc) 2547 { 2548 if(type == SAMPLER_PIXEL) 2549 { 2550 PixelProcessor::setCompareFunc(sampler, compFunc); 2551 } 2552 else 2553 { 2554 VertexProcessor::setCompareFunc(sampler, compFunc); 2555 } 2556 } 2557 setBaseLevel(SamplerType type,int sampler,int baseLevel)2558 void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel) 2559 { 2560 if(type == SAMPLER_PIXEL) 2561 { 2562 PixelProcessor::setBaseLevel(sampler, baseLevel); 2563 } 2564 else 2565 { 2566 VertexProcessor::setBaseLevel(sampler, baseLevel); 2567 } 2568 } 2569 setMaxLevel(SamplerType type,int sampler,int maxLevel)2570 void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel) 2571 { 2572 if(type == SAMPLER_PIXEL) 2573 { 2574 PixelProcessor::setMaxLevel(sampler, maxLevel); 2575 } 2576 else 2577 { 2578 VertexProcessor::setMaxLevel(sampler, maxLevel); 2579 } 2580 } 2581 setMinLod(SamplerType type,int sampler,float minLod)2582 void Renderer::setMinLod(SamplerType type, int sampler, float minLod) 2583 { 2584 if(type == SAMPLER_PIXEL) 2585 { 2586 PixelProcessor::setMinLod(sampler, minLod); 2587 } 2588 else 2589 { 2590 VertexProcessor::setMinLod(sampler, minLod); 2591 } 2592 } 2593 setMaxLod(SamplerType type,int sampler,float maxLod)2594 void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod) 2595 { 2596 if(type == SAMPLER_PIXEL) 2597 { 2598 PixelProcessor::setMaxLod(sampler, maxLod); 2599 } 2600 else 2601 { 2602 VertexProcessor::setMaxLod(sampler, maxLod); 2603 } 2604 } 2605 setSyncRequired(SamplerType type,int sampler,bool syncRequired)2606 void Renderer::setSyncRequired(SamplerType type, int sampler, bool syncRequired) 2607 { 2608 if(type == SAMPLER_PIXEL) 2609 { 2610 PixelProcessor::setSyncRequired(sampler, syncRequired); 2611 } 2612 else 2613 { 2614 VertexProcessor::setSyncRequired(sampler, syncRequired); 2615 } 2616 } 2617 setPointSpriteEnable(bool pointSpriteEnable)2618 void Renderer::setPointSpriteEnable(bool pointSpriteEnable) 2619 { 2620 context->setPointSpriteEnable(pointSpriteEnable); 2621 } 2622 setPointScaleEnable(bool pointScaleEnable)2623 void Renderer::setPointScaleEnable(bool pointScaleEnable) 2624 { 2625 context->setPointScaleEnable(pointScaleEnable); 2626 } 2627 setLineWidth(float width)2628 void Renderer::setLineWidth(float width) 2629 { 2630 context->lineWidth = width; 2631 } 2632 setDepthBias(float bias)2633 void Renderer::setDepthBias(float bias) 2634 { 2635 context->depthBias = bias; 2636 } 2637 setSlopeDepthBias(float slopeBias)2638 void Renderer::setSlopeDepthBias(float slopeBias) 2639 { 2640 context->slopeDepthBias = slopeBias; 2641 } 2642 setRasterizerDiscard(bool rasterizerDiscard)2643 void Renderer::setRasterizerDiscard(bool rasterizerDiscard) 2644 { 2645 context->rasterizerDiscard = rasterizerDiscard; 2646 } 2647 setPixelShader(const PixelShader * shader)2648 void Renderer::setPixelShader(const PixelShader *shader) 2649 { 2650 context->pixelShader = shader; 2651 2652 loadConstants(shader); 2653 } 2654 setVertexShader(const VertexShader * shader)2655 void Renderer::setVertexShader(const VertexShader *shader) 2656 { 2657 context->vertexShader = shader; 2658 2659 loadConstants(shader); 2660 } 2661 setPixelShaderConstantF(unsigned int index,const float value[4],unsigned int count)2662 void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2663 { 2664 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2665 { 2666 if(drawCall[i]->psDirtyConstF < index + count) 2667 { 2668 drawCall[i]->psDirtyConstF = index + count; 2669 } 2670 } 2671 2672 for(unsigned int i = 0; i < count; i++) 2673 { 2674 PixelProcessor::setFloatConstant(index + i, value); 2675 value += 4; 2676 } 2677 } 2678 setPixelShaderConstantI(unsigned int index,const int value[4],unsigned int count)2679 void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2680 { 2681 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2682 { 2683 if(drawCall[i]->psDirtyConstI < index + count) 2684 { 2685 drawCall[i]->psDirtyConstI = index + count; 2686 } 2687 } 2688 2689 for(unsigned int i = 0; i < count; i++) 2690 { 2691 PixelProcessor::setIntegerConstant(index + i, value); 2692 value += 4; 2693 } 2694 } 2695 setPixelShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2696 void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2697 { 2698 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2699 { 2700 if(drawCall[i]->psDirtyConstB < index + count) 2701 { 2702 drawCall[i]->psDirtyConstB = index + count; 2703 } 2704 } 2705 2706 for(unsigned int i = 0; i < count; i++) 2707 { 2708 PixelProcessor::setBooleanConstant(index + i, *boolean); 2709 boolean++; 2710 } 2711 } 2712 setVertexShaderConstantF(unsigned int index,const float value[4],unsigned int count)2713 void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2714 { 2715 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2716 { 2717 if(drawCall[i]->vsDirtyConstF < index + count) 2718 { 2719 drawCall[i]->vsDirtyConstF = index + count; 2720 } 2721 } 2722 2723 for(unsigned int i = 0; i < count; i++) 2724 { 2725 VertexProcessor::setFloatConstant(index + i, value); 2726 value += 4; 2727 } 2728 } 2729 setVertexShaderConstantI(unsigned int index,const int value[4],unsigned int count)2730 void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2731 { 2732 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2733 { 2734 if(drawCall[i]->vsDirtyConstI < index + count) 2735 { 2736 drawCall[i]->vsDirtyConstI = index + count; 2737 } 2738 } 2739 2740 for(unsigned int i = 0; i < count; i++) 2741 { 2742 VertexProcessor::setIntegerConstant(index + i, value); 2743 value += 4; 2744 } 2745 } 2746 setVertexShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2747 void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2748 { 2749 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2750 { 2751 if(drawCall[i]->vsDirtyConstB < index + count) 2752 { 2753 drawCall[i]->vsDirtyConstB = index + count; 2754 } 2755 } 2756 2757 for(unsigned int i = 0; i < count; i++) 2758 { 2759 VertexProcessor::setBooleanConstant(index + i, *boolean); 2760 boolean++; 2761 } 2762 } 2763 setModelMatrix(const Matrix & M,int i)2764 void Renderer::setModelMatrix(const Matrix &M, int i) 2765 { 2766 VertexProcessor::setModelMatrix(M, i); 2767 } 2768 setViewMatrix(const Matrix & V)2769 void Renderer::setViewMatrix(const Matrix &V) 2770 { 2771 VertexProcessor::setViewMatrix(V); 2772 updateClipPlanes = true; 2773 } 2774 setBaseMatrix(const Matrix & B)2775 void Renderer::setBaseMatrix(const Matrix &B) 2776 { 2777 VertexProcessor::setBaseMatrix(B); 2778 updateClipPlanes = true; 2779 } 2780 setProjectionMatrix(const Matrix & P)2781 void Renderer::setProjectionMatrix(const Matrix &P) 2782 { 2783 VertexProcessor::setProjectionMatrix(P); 2784 updateClipPlanes = true; 2785 } 2786 addQuery(Query * query)2787 void Renderer::addQuery(Query *query) 2788 { 2789 queries.push_back(query); 2790 } 2791 removeQuery(Query * query)2792 void Renderer::removeQuery(Query *query) 2793 { 2794 queries.remove(query); 2795 } 2796 2797 #if PERF_HUD getThreadCount()2798 int Renderer::getThreadCount() 2799 { 2800 return threadCount; 2801 } 2802 getVertexTime(int thread)2803 int64_t Renderer::getVertexTime(int thread) 2804 { 2805 return vertexTime[thread]; 2806 } 2807 getSetupTime(int thread)2808 int64_t Renderer::getSetupTime(int thread) 2809 { 2810 return setupTime[thread]; 2811 } 2812 getPixelTime(int thread)2813 int64_t Renderer::getPixelTime(int thread) 2814 { 2815 return pixelTime[thread]; 2816 } 2817 resetTimers()2818 void Renderer::resetTimers() 2819 { 2820 for(int thread = 0; thread < threadCount; thread++) 2821 { 2822 vertexTime[thread] = 0; 2823 setupTime[thread] = 0; 2824 pixelTime[thread] = 0; 2825 } 2826 } 2827 #endif 2828 setViewport(const Viewport & viewport)2829 void Renderer::setViewport(const Viewport &viewport) 2830 { 2831 this->viewport = viewport; 2832 } 2833 setScissor(const Rect & scissor)2834 void Renderer::setScissor(const Rect &scissor) 2835 { 2836 this->scissor = scissor; 2837 } 2838 setClipFlags(int flags)2839 void Renderer::setClipFlags(int flags) 2840 { 2841 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum 2842 } 2843 setClipPlane(unsigned int index,const float plane[4])2844 void Renderer::setClipPlane(unsigned int index, const float plane[4]) 2845 { 2846 if(index < MAX_CLIP_PLANES) 2847 { 2848 userPlane[index] = plane; 2849 } 2850 else ASSERT(false); 2851 2852 updateClipPlanes = true; 2853 } 2854 updateConfiguration(bool initialUpdate)2855 void Renderer::updateConfiguration(bool initialUpdate) 2856 { 2857 bool newConfiguration = swiftConfig->hasNewConfiguration(); 2858 2859 if(newConfiguration || initialUpdate) 2860 { 2861 terminateThreads(); 2862 2863 SwiftConfig::Configuration configuration = {}; 2864 swiftConfig->getConfiguration(configuration); 2865 2866 precacheVertex = !newConfiguration && configuration.precache; 2867 precacheSetup = !newConfiguration && configuration.precache; 2868 precachePixel = !newConfiguration && configuration.precache; 2869 2870 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize); 2871 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize); 2872 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize); 2873 2874 switch(configuration.textureSampleQuality) 2875 { 2876 case 0: Sampler::setFilterQuality(FILTER_POINT); break; 2877 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break; 2878 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2879 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2880 } 2881 2882 switch(configuration.mipmapQuality) 2883 { 2884 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break; 2885 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2886 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2887 } 2888 2889 setPerspectiveCorrection(configuration.perspectiveCorrection); 2890 2891 switch(configuration.transcendentalPrecision) 2892 { 2893 case 0: 2894 logPrecision = APPROXIMATE; 2895 expPrecision = APPROXIMATE; 2896 rcpPrecision = APPROXIMATE; 2897 rsqPrecision = APPROXIMATE; 2898 break; 2899 case 1: 2900 logPrecision = PARTIAL; 2901 expPrecision = PARTIAL; 2902 rcpPrecision = PARTIAL; 2903 rsqPrecision = PARTIAL; 2904 break; 2905 case 2: 2906 logPrecision = ACCURATE; 2907 expPrecision = ACCURATE; 2908 rcpPrecision = ACCURATE; 2909 rsqPrecision = ACCURATE; 2910 break; 2911 case 3: 2912 logPrecision = WHQL; 2913 expPrecision = WHQL; 2914 rcpPrecision = WHQL; 2915 rsqPrecision = WHQL; 2916 break; 2917 case 4: 2918 logPrecision = IEEE; 2919 expPrecision = IEEE; 2920 rcpPrecision = IEEE; 2921 rsqPrecision = IEEE; 2922 break; 2923 default: 2924 logPrecision = ACCURATE; 2925 expPrecision = ACCURATE; 2926 rcpPrecision = ACCURATE; 2927 rsqPrecision = ACCURATE; 2928 break; 2929 } 2930 2931 switch(configuration.transparencyAntialiasing) 2932 { 2933 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2934 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break; 2935 default: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2936 } 2937 2938 switch(configuration.threadCount) 2939 { 2940 case -1: threadCount = CPUID::coreCount(); break; 2941 case 0: threadCount = CPUID::processAffinity(); break; 2942 default: threadCount = configuration.threadCount; break; 2943 } 2944 2945 CPUID::setEnableSSE4_1(configuration.enableSSE4_1); 2946 CPUID::setEnableSSSE3(configuration.enableSSSE3); 2947 CPUID::setEnableSSE3(configuration.enableSSE3); 2948 CPUID::setEnableSSE2(configuration.enableSSE2); 2949 CPUID::setEnableSSE(configuration.enableSSE); 2950 2951 rr::Config::Edit cfg; 2952 cfg.clearOptimizationPasses(); 2953 for(auto pass : configuration.optimization) 2954 { 2955 if (pass != rr::Optimization::Pass::Disabled) { cfg.add(pass); } 2956 } 2957 rr::Nucleus::adjustDefaultConfig(cfg); 2958 2959 forceWindowed = configuration.forceWindowed; 2960 complementaryDepthBuffer = configuration.complementaryDepthBuffer; 2961 postBlendSRGB = configuration.postBlendSRGB; 2962 exactColorRounding = configuration.exactColorRounding; 2963 forceClearRegisters = configuration.forceClearRegisters; 2964 2965 #ifndef NDEBUG 2966 minPrimitives = configuration.minPrimitives; 2967 maxPrimitives = configuration.maxPrimitives; 2968 #endif 2969 } 2970 2971 if(!initialUpdate && !worker[0]) 2972 { 2973 initializeThreads(); 2974 } 2975 } 2976 } 2977