1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Renderer.hpp" 16 17 #include "Clipper.hpp" 18 #include "Math.hpp" 19 #include "FrameBuffer.hpp" 20 #include "Timer.hpp" 21 #include "Surface.hpp" 22 #include "Half.hpp" 23 #include "Primitive.hpp" 24 #include "Polygon.hpp" 25 #include "SwiftConfig.hpp" 26 #include "MutexLock.hpp" 27 #include "CPUID.hpp" 28 #include "Memory.hpp" 29 #include "Resource.hpp" 30 #include "Constants.hpp" 31 #include "Debug.hpp" 32 #include "Reactor/Reactor.hpp" 33 34 #undef max 35 36 bool disableServer = true; 37 38 #ifndef NDEBUG 39 unsigned int minPrimitives = 1; 40 unsigned int maxPrimitives = 1 << 21; 41 #endif 42 43 namespace sw 44 { 45 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 46 extern bool symmetricNormalizedDepth; // [-1, 1] instead of [0, 1] 47 extern bool booleanFaceRegister; 48 extern bool fullPixelPositionRegister; 49 extern bool leadingVertexFirst; // Flat shading uses first vertex, else last 50 extern bool secondaryColor; // Specular lighting is applied after texturing 51 extern bool colorsDefaultToZero; 52 53 extern bool forceWindowed; 54 extern bool complementaryDepthBuffer; 55 extern bool postBlendSRGB; 56 extern bool exactColorRounding; 57 extern TransparencyAntialiasing transparencyAntialiasing; 58 extern bool forceClearRegisters; 59 60 extern bool precacheVertex; 61 extern bool precacheSetup; 62 extern bool precachePixel; 63 64 int batchSize = 128; 65 int threadCount = 1; 66 int unitCount = 1; 67 int clusterCount = 1; 68 69 TranscendentalPrecision logPrecision = ACCURATE; 70 TranscendentalPrecision expPrecision = ACCURATE; 71 TranscendentalPrecision rcpPrecision = ACCURATE; 72 TranscendentalPrecision rsqPrecision = ACCURATE; 73 bool perspectiveCorrection = true; 74 75 struct Parameters 76 { 77 Renderer *renderer; 78 int threadIndex; 79 }; 80 DrawCall()81 DrawCall::DrawCall() 82 { 83 queries = 0; 84 85 vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 86 vsDirtyConstI = 16; 87 vsDirtyConstB = 16; 88 89 psDirtyConstF = FRAGMENT_UNIFORM_VECTORS; 90 psDirtyConstI = 16; 91 psDirtyConstB = 16; 92 93 references = -1; 94 95 data = (DrawData*)allocate(sizeof(DrawData)); 96 data->constants = &constants; 97 } 98 ~DrawCall()99 DrawCall::~DrawCall() 100 { 101 delete queries; 102 103 deallocate(data); 104 } 105 Renderer(Context * context,Conventions conventions,bool exactColorRounding)106 Renderer::Renderer(Context *context, Conventions conventions, bool exactColorRounding) : VertexProcessor(context), PixelProcessor(context), SetupProcessor(context), context(context), viewport() 107 { 108 sw::halfIntegerCoordinates = conventions.halfIntegerCoordinates; 109 sw::symmetricNormalizedDepth = conventions.symmetricNormalizedDepth; 110 sw::booleanFaceRegister = conventions.booleanFaceRegister; 111 sw::fullPixelPositionRegister = conventions.fullPixelPositionRegister; 112 sw::leadingVertexFirst = conventions.leadingVertexFirst; 113 sw::secondaryColor = conventions.secondaryColor; 114 sw::colorsDefaultToZero = conventions.colorsDefaultToZero; 115 sw::exactColorRounding = exactColorRounding; 116 117 setRenderTarget(0, 0); 118 clipper = new Clipper(symmetricNormalizedDepth); 119 blitter = new Blitter; 120 121 updateViewMatrix = true; 122 updateBaseMatrix = true; 123 updateProjectionMatrix = true; 124 updateClipPlanes = true; 125 126 #if PERF_HUD 127 resetTimers(); 128 #endif 129 130 for(int i = 0; i < 16; i++) 131 { 132 vertexTask[i] = 0; 133 134 worker[i] = 0; 135 resume[i] = 0; 136 suspend[i] = 0; 137 } 138 139 threadsAwake = 0; 140 resumeApp = new Event(); 141 142 currentDraw = 0; 143 nextDraw = 0; 144 145 qHead = 0; 146 qSize = 0; 147 148 for(int i = 0; i < 16; i++) 149 { 150 triangleBatch[i] = 0; 151 primitiveBatch[i] = 0; 152 } 153 154 for(int draw = 0; draw < DRAW_COUNT; draw++) 155 { 156 drawCall[draw] = new DrawCall(); 157 drawList[draw] = drawCall[draw]; 158 } 159 160 for(int unit = 0; unit < 16; unit++) 161 { 162 primitiveProgress[unit].init(); 163 } 164 165 for(int cluster = 0; cluster < 16; cluster++) 166 { 167 pixelProgress[cluster].init(); 168 } 169 170 clipFlags = 0; 171 172 swiftConfig = new SwiftConfig(disableServer); 173 updateConfiguration(true); 174 175 sync = new Resource(0); 176 } 177 ~Renderer()178 Renderer::~Renderer() 179 { 180 sync->destruct(); 181 182 delete clipper; 183 clipper = nullptr; 184 185 delete blitter; 186 blitter = nullptr; 187 188 terminateThreads(); 189 delete resumeApp; 190 191 for(int draw = 0; draw < DRAW_COUNT; draw++) 192 { 193 delete drawCall[draw]; 194 } 195 196 delete swiftConfig; 197 } 198 199 // This object has to be mem aligned operator new(size_t size)200 void* Renderer::operator new(size_t size) 201 { 202 ASSERT(size == sizeof(Renderer)); // This operator can't be called from a derived class 203 return sw::allocate(sizeof(Renderer), 16); 204 } 205 operator delete(void * mem)206 void Renderer::operator delete(void * mem) 207 { 208 sw::deallocate(mem); 209 } 210 draw(DrawType drawType,unsigned int indexOffset,unsigned int count,bool update)211 void Renderer::draw(DrawType drawType, unsigned int indexOffset, unsigned int count, bool update) 212 { 213 #ifndef NDEBUG 214 if(count < minPrimitives || count > maxPrimitives) 215 { 216 return; 217 } 218 #endif 219 220 context->drawType = drawType; 221 222 updateConfiguration(); 223 updateClipper(); 224 225 int ss = context->getSuperSampleCount(); 226 int ms = context->getMultiSampleCount(); 227 228 for(int q = 0; q < ss; q++) 229 { 230 unsigned int oldMultiSampleMask = context->multiSampleMask; 231 context->multiSampleMask = (context->sampleMask >> (ms * q)) & ((unsigned)0xFFFFFFFF >> (32 - ms)); 232 233 if(!context->multiSampleMask) 234 { 235 continue; 236 } 237 238 sync->lock(sw::PRIVATE); 239 240 if(update || oldMultiSampleMask != context->multiSampleMask) 241 { 242 vertexState = VertexProcessor::update(drawType); 243 setupState = SetupProcessor::update(); 244 pixelState = PixelProcessor::update(); 245 246 vertexRoutine = VertexProcessor::routine(vertexState); 247 setupRoutine = SetupProcessor::routine(setupState); 248 pixelRoutine = PixelProcessor::routine(pixelState); 249 } 250 251 int batch = batchSize / ms; 252 253 int (Renderer::*setupPrimitives)(int batch, int count); 254 255 if(context->isDrawTriangle()) 256 { 257 switch(context->fillMode) 258 { 259 case FILL_SOLID: 260 setupPrimitives = &Renderer::setupSolidTriangles; 261 break; 262 case FILL_WIREFRAME: 263 setupPrimitives = &Renderer::setupWireframeTriangle; 264 batch = 1; 265 break; 266 case FILL_VERTEX: 267 setupPrimitives = &Renderer::setupVertexTriangle; 268 batch = 1; 269 break; 270 default: 271 ASSERT(false); 272 return; 273 } 274 } 275 else if(context->isDrawLine()) 276 { 277 setupPrimitives = &Renderer::setupLines; 278 } 279 else // Point draw 280 { 281 setupPrimitives = &Renderer::setupPoints; 282 } 283 284 DrawCall *draw = 0; 285 286 do 287 { 288 for(int i = 0; i < DRAW_COUNT; i++) 289 { 290 if(drawCall[i]->references == -1) 291 { 292 draw = drawCall[i]; 293 drawList[nextDraw % DRAW_COUNT] = draw; 294 295 break; 296 } 297 } 298 299 if(!draw) 300 { 301 resumeApp->wait(); 302 } 303 } 304 while(!draw); 305 306 DrawData *data = draw->data; 307 308 if(queries.size() != 0) 309 { 310 draw->queries = new std::list<Query*>(); 311 bool includePrimitivesWrittenQueries = vertexState.transformFeedbackQueryEnabled && vertexState.transformFeedbackEnabled; 312 for(std::list<Query*>::iterator query = queries.begin(); query != queries.end(); query++) 313 { 314 Query* q = *query; 315 if(includePrimitivesWrittenQueries || (q->type != Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN)) 316 { 317 atomicIncrement(&(q->reference)); 318 draw->queries->push_back(q); 319 } 320 } 321 } 322 323 draw->drawType = drawType; 324 draw->batchSize = batch; 325 326 vertexRoutine->bind(); 327 setupRoutine->bind(); 328 pixelRoutine->bind(); 329 330 draw->vertexRoutine = vertexRoutine; 331 draw->setupRoutine = setupRoutine; 332 draw->pixelRoutine = pixelRoutine; 333 draw->vertexPointer = (VertexProcessor::RoutinePointer)vertexRoutine->getEntry(); 334 draw->setupPointer = (SetupProcessor::RoutinePointer)setupRoutine->getEntry(); 335 draw->pixelPointer = (PixelProcessor::RoutinePointer)pixelRoutine->getEntry(); 336 draw->setupPrimitives = setupPrimitives; 337 draw->setupState = setupState; 338 339 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 340 { 341 draw->vertexStream[i] = context->input[i].resource; 342 data->input[i] = context->input[i].buffer; 343 data->stride[i] = context->input[i].stride; 344 345 if(draw->vertexStream[i]) 346 { 347 draw->vertexStream[i]->lock(PUBLIC, PRIVATE); 348 } 349 } 350 351 if(context->indexBuffer) 352 { 353 data->indices = (unsigned char*)context->indexBuffer->lock(PUBLIC, PRIVATE) + indexOffset; 354 } 355 356 draw->indexBuffer = context->indexBuffer; 357 358 for(int sampler = 0; sampler < TOTAL_IMAGE_UNITS; sampler++) 359 { 360 draw->texture[sampler] = 0; 361 } 362 363 for(int sampler = 0; sampler < TEXTURE_IMAGE_UNITS; sampler++) 364 { 365 if(pixelState.sampler[sampler].textureType != TEXTURE_NULL) 366 { 367 draw->texture[sampler] = context->texture[sampler]; 368 draw->texture[sampler]->lock(PUBLIC, isReadWriteTexture(sampler) ? MANAGED : PRIVATE); // If the texure is both read and written, use the same read/write lock as render targets 369 370 data->mipmap[sampler] = context->sampler[sampler].getTextureData(); 371 } 372 } 373 374 if(context->pixelShader) 375 { 376 if(draw->psDirtyConstF) 377 { 378 memcpy(&data->ps.cW, PixelProcessor::cW, sizeof(word4) * 4 * (draw->psDirtyConstF < 8 ? draw->psDirtyConstF : 8)); 379 memcpy(&data->ps.c, PixelProcessor::c, sizeof(float4) * draw->psDirtyConstF); 380 draw->psDirtyConstF = 0; 381 } 382 383 if(draw->psDirtyConstI) 384 { 385 memcpy(&data->ps.i, PixelProcessor::i, sizeof(int4) * draw->psDirtyConstI); 386 draw->psDirtyConstI = 0; 387 } 388 389 if(draw->psDirtyConstB) 390 { 391 memcpy(&data->ps.b, PixelProcessor::b, sizeof(bool) * draw->psDirtyConstB); 392 draw->psDirtyConstB = 0; 393 } 394 395 PixelProcessor::lockUniformBuffers(data->ps.u, draw->pUniformBuffers); 396 } 397 else 398 { 399 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 400 { 401 draw->pUniformBuffers[i] = nullptr; 402 } 403 } 404 405 if(context->pixelShaderVersion() <= 0x0104) 406 { 407 for(int stage = 0; stage < 8; stage++) 408 { 409 if(pixelState.textureStage[stage].stageOperation != TextureStage::STAGE_DISABLE || context->pixelShader) 410 { 411 data->textureStage[stage] = context->textureStage[stage].uniforms; 412 } 413 else break; 414 } 415 } 416 417 if(context->vertexShader) 418 { 419 if(context->vertexShader->getVersion() >= 0x0300) 420 { 421 for(int sampler = 0; sampler < VERTEX_TEXTURE_IMAGE_UNITS; sampler++) 422 { 423 if(vertexState.samplerState[sampler].textureType != TEXTURE_NULL) 424 { 425 draw->texture[TEXTURE_IMAGE_UNITS + sampler] = context->texture[TEXTURE_IMAGE_UNITS + sampler]; 426 draw->texture[TEXTURE_IMAGE_UNITS + sampler]->lock(PUBLIC, PRIVATE); 427 428 data->mipmap[TEXTURE_IMAGE_UNITS + sampler] = context->sampler[TEXTURE_IMAGE_UNITS + sampler].getTextureData(); 429 } 430 } 431 } 432 433 if(draw->vsDirtyConstF) 434 { 435 memcpy(&data->vs.c, VertexProcessor::c, sizeof(float4) * draw->vsDirtyConstF); 436 draw->vsDirtyConstF = 0; 437 } 438 439 if(draw->vsDirtyConstI) 440 { 441 memcpy(&data->vs.i, VertexProcessor::i, sizeof(int4) * draw->vsDirtyConstI); 442 draw->vsDirtyConstI = 0; 443 } 444 445 if(draw->vsDirtyConstB) 446 { 447 memcpy(&data->vs.b, VertexProcessor::b, sizeof(bool) * draw->vsDirtyConstB); 448 draw->vsDirtyConstB = 0; 449 } 450 451 if(context->vertexShader->isInstanceIdDeclared()) 452 { 453 data->instanceID = context->instanceID; 454 } 455 456 VertexProcessor::lockUniformBuffers(data->vs.u, draw->vUniformBuffers); 457 VertexProcessor::lockTransformFeedbackBuffers(data->vs.t, data->vs.reg, data->vs.row, data->vs.col, data->vs.str, draw->transformFeedbackBuffers); 458 } 459 else 460 { 461 data->ff = ff; 462 463 draw->vsDirtyConstF = VERTEX_UNIFORM_VECTORS + 1; 464 draw->vsDirtyConstI = 16; 465 draw->vsDirtyConstB = 16; 466 467 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 468 { 469 draw->vUniformBuffers[i] = nullptr; 470 } 471 472 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 473 { 474 draw->transformFeedbackBuffers[i] = nullptr; 475 } 476 } 477 478 if(pixelState.stencilActive) 479 { 480 data->stencil[0] = stencil; 481 data->stencil[1] = stencilCCW; 482 } 483 484 if(pixelState.fogActive) 485 { 486 data->fog = fog; 487 } 488 489 if(setupState.isDrawPoint) 490 { 491 data->point = point; 492 } 493 494 data->lineWidth = context->lineWidth; 495 496 data->factor = factor; 497 498 if(pixelState.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 499 { 500 float ref = context->alphaReference * (1.0f / 255.0f); 501 float margin = sw::min(ref, 1.0f - ref); 502 503 if(ms == 4) 504 { 505 data->a2c0 = replicate(ref - margin * 0.6f); 506 data->a2c1 = replicate(ref - margin * 0.2f); 507 data->a2c2 = replicate(ref + margin * 0.2f); 508 data->a2c3 = replicate(ref + margin * 0.6f); 509 } 510 else if(ms == 2) 511 { 512 data->a2c0 = replicate(ref - margin * 0.3f); 513 data->a2c1 = replicate(ref + margin * 0.3f); 514 } 515 else ASSERT(false); 516 } 517 518 if(pixelState.occlusionEnabled) 519 { 520 for(int cluster = 0; cluster < clusterCount; cluster++) 521 { 522 data->occlusion[cluster] = 0; 523 } 524 } 525 526 #if PERF_PROFILE 527 for(int cluster = 0; cluster < clusterCount; cluster++) 528 { 529 for(int i = 0; i < PERF_TIMERS; i++) 530 { 531 data->cycles[i][cluster] = 0; 532 } 533 } 534 #endif 535 536 // Viewport 537 { 538 float W = 0.5f * viewport.width; 539 float H = 0.5f * viewport.height; 540 float X0 = viewport.x0 + W; 541 float Y0 = viewport.y0 + H; 542 float N = viewport.minZ; 543 float F = viewport.maxZ; 544 float Z = F - N; 545 546 if(context->isDrawTriangle(false)) 547 { 548 N += depthBias; 549 } 550 551 if(complementaryDepthBuffer) 552 { 553 Z = -Z; 554 N = 1 - N; 555 } 556 557 static const float X[5][16] = // Fragment offsets 558 { 559 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 560 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 561 {-0.3000f, +0.1000f, +0.3000f, -0.1000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 562 {+0.1875f, -0.3125f, +0.3125f, -0.4375f, -0.0625f, +0.4375f, +0.0625f, -0.1875f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 563 {+0.2553f, -0.1155f, +0.1661f, -0.1828f, +0.2293f, -0.4132f, -0.1773f, -0.0577f, +0.3891f, -0.4656f, +0.4103f, +0.4248f, -0.2109f, +0.3966f, -0.2664f, -0.3872f} // 16 samples 564 }; 565 566 static const float Y[5][16] = // Fragment offsets 567 { 568 {+0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 1 sample 569 {-0.2500f, +0.2500f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 2 samples 570 {-0.1000f, -0.3000f, +0.1000f, +0.3000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 4 samples 571 {-0.4375f, -0.3125f, -0.1875f, -0.0625f, +0.0625f, +0.1875f, +0.3125f, +0.4375f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f, +0.0000f}, // 8 samples 572 {-0.4503f, +0.1883f, +0.3684f, -0.4668f, -0.0690f, -0.1315f, +0.4999f, +0.0728f, +0.1070f, -0.3086f, +0.3725f, -0.1547f, -0.1102f, -0.3588f, +0.1789f, +0.0269f} // 16 samples 573 }; 574 575 int s = sw::log2(ss); 576 577 data->Wx16 = replicate(W * 16); 578 data->Hx16 = replicate(H * 16); 579 data->X0x16 = replicate(X0 * 16 - 8); 580 data->Y0x16 = replicate(Y0 * 16 - 8); 581 data->XXXX = replicate(X[s][q] / W); 582 data->YYYY = replicate(Y[s][q] / H); 583 data->halfPixelX = replicate(0.5f / W); 584 data->halfPixelY = replicate(0.5f / H); 585 data->viewportHeight = abs(viewport.height); 586 data->slopeDepthBias = slopeDepthBias; 587 data->depthRange = Z; 588 data->depthNear = N; 589 draw->clipFlags = clipFlags; 590 591 if(clipFlags) 592 { 593 if(clipFlags & Clipper::CLIP_PLANE0) data->clipPlane[0] = clipPlane[0]; 594 if(clipFlags & Clipper::CLIP_PLANE1) data->clipPlane[1] = clipPlane[1]; 595 if(clipFlags & Clipper::CLIP_PLANE2) data->clipPlane[2] = clipPlane[2]; 596 if(clipFlags & Clipper::CLIP_PLANE3) data->clipPlane[3] = clipPlane[3]; 597 if(clipFlags & Clipper::CLIP_PLANE4) data->clipPlane[4] = clipPlane[4]; 598 if(clipFlags & Clipper::CLIP_PLANE5) data->clipPlane[5] = clipPlane[5]; 599 } 600 } 601 602 // Target 603 { 604 for(int index = 0; index < RENDERTARGETS; index++) 605 { 606 draw->renderTarget[index] = context->renderTarget[index]; 607 608 if(draw->renderTarget[index]) 609 { 610 data->colorBuffer[index] = (unsigned int*)context->renderTarget[index]->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED); 611 data->colorPitchB[index] = context->renderTarget[index]->getInternalPitchB(); 612 data->colorSliceB[index] = context->renderTarget[index]->getInternalSliceB(); 613 } 614 } 615 616 draw->depthBuffer = context->depthBuffer; 617 draw->stencilBuffer = context->stencilBuffer; 618 619 if(draw->depthBuffer) 620 { 621 data->depthBuffer = (float*)context->depthBuffer->lockInternal(0, 0, q * ms, LOCK_READWRITE, MANAGED); 622 data->depthPitchB = context->depthBuffer->getInternalPitchB(); 623 data->depthSliceB = context->depthBuffer->getInternalSliceB(); 624 } 625 626 if(draw->stencilBuffer) 627 { 628 data->stencilBuffer = (unsigned char*)context->stencilBuffer->lockStencil(0, 0, q * ms, MANAGED); 629 data->stencilPitchB = context->stencilBuffer->getStencilPitchB(); 630 data->stencilSliceB = context->stencilBuffer->getStencilSliceB(); 631 } 632 } 633 634 // Scissor 635 { 636 data->scissorX0 = scissor.x0; 637 data->scissorX1 = scissor.x1; 638 data->scissorY0 = scissor.y0; 639 data->scissorY1 = scissor.y1; 640 } 641 642 draw->primitive = 0; 643 draw->count = count; 644 645 draw->references = (count + batch - 1) / batch; 646 647 schedulerMutex.lock(); 648 nextDraw++; 649 schedulerMutex.unlock(); 650 651 #ifndef NDEBUG 652 if(threadCount == 1) // Use main thread for draw execution 653 { 654 threadsAwake = 1; 655 task[0].type = Task::RESUME; 656 657 taskLoop(0); 658 } 659 else 660 #endif 661 { 662 if(!threadsAwake) 663 { 664 suspend[0]->wait(); 665 666 threadsAwake = 1; 667 task[0].type = Task::RESUME; 668 669 resume[0]->signal(); 670 } 671 } 672 } 673 } 674 clear(void * value,Format format,Surface * dest,const Rect & clearRect,unsigned int rgbaMask)675 void Renderer::clear(void *value, Format format, Surface *dest, const Rect &clearRect, unsigned int rgbaMask) 676 { 677 SliceRect rect = clearRect; 678 int samples = dest->getDepth(); 679 680 for(rect.slice = 0; rect.slice < samples; rect.slice++) 681 { 682 blitter->clear(value, format, dest, rect, rgbaMask); 683 } 684 } 685 blit(Surface * source,const SliceRect & sRect,Surface * dest,const SliceRect & dRect,bool filter,bool isStencil)686 void Renderer::blit(Surface *source, const SliceRect &sRect, Surface *dest, const SliceRect &dRect, bool filter, bool isStencil) 687 { 688 blitter->blit(source, sRect, dest, dRect, filter, isStencil); 689 } 690 blit3D(Surface * source,Surface * dest)691 void Renderer::blit3D(Surface *source, Surface *dest) 692 { 693 blitter->blit3D(source, dest); 694 } 695 threadFunction(void * parameters)696 void Renderer::threadFunction(void *parameters) 697 { 698 Renderer *renderer = static_cast<Parameters*>(parameters)->renderer; 699 int threadIndex = static_cast<Parameters*>(parameters)->threadIndex; 700 701 if(logPrecision < IEEE) 702 { 703 CPUID::setFlushToZero(true); 704 CPUID::setDenormalsAreZero(true); 705 } 706 707 renderer->threadLoop(threadIndex); 708 } 709 threadLoop(int threadIndex)710 void Renderer::threadLoop(int threadIndex) 711 { 712 while(!exitThreads) 713 { 714 taskLoop(threadIndex); 715 716 suspend[threadIndex]->signal(); 717 resume[threadIndex]->wait(); 718 } 719 } 720 taskLoop(int threadIndex)721 void Renderer::taskLoop(int threadIndex) 722 { 723 while(task[threadIndex].type != Task::SUSPEND) 724 { 725 scheduleTask(threadIndex); 726 executeTask(threadIndex); 727 } 728 } 729 findAvailableTasks()730 void Renderer::findAvailableTasks() 731 { 732 // Find pixel tasks 733 for(int cluster = 0; cluster < clusterCount; cluster++) 734 { 735 if(!pixelProgress[cluster].executing) 736 { 737 for(int unit = 0; unit < unitCount; unit++) 738 { 739 if(primitiveProgress[unit].references > 0) // Contains processed primitives 740 { 741 if(pixelProgress[cluster].drawCall == primitiveProgress[unit].drawCall) 742 { 743 if(pixelProgress[cluster].processedPrimitives == primitiveProgress[unit].firstPrimitive) // Previous primitives have been rendered 744 { 745 Task &task = taskQueue[qHead]; 746 task.type = Task::PIXELS; 747 task.primitiveUnit = unit; 748 task.pixelCluster = cluster; 749 750 pixelProgress[cluster].executing = true; 751 752 // Commit to the task queue 753 qHead = (qHead + 1) % 32; 754 qSize++; 755 756 break; 757 } 758 } 759 } 760 } 761 } 762 } 763 764 // Find primitive tasks 765 if(currentDraw == nextDraw) 766 { 767 return; // No more primitives to process 768 } 769 770 for(int unit = 0; unit < unitCount; unit++) 771 { 772 DrawCall *draw = drawList[currentDraw % DRAW_COUNT]; 773 774 if(draw->primitive >= draw->count) 775 { 776 currentDraw++; 777 778 if(currentDraw == nextDraw) 779 { 780 return; // No more primitives to process 781 } 782 783 draw = drawList[currentDraw % DRAW_COUNT]; 784 } 785 786 if(!primitiveProgress[unit].references) // Task not already being executed and not still in use by a pixel unit 787 { 788 int primitive = draw->primitive; 789 int count = draw->count; 790 int batch = draw->batchSize; 791 792 primitiveProgress[unit].drawCall = currentDraw; 793 primitiveProgress[unit].firstPrimitive = primitive; 794 primitiveProgress[unit].primitiveCount = count - primitive >= batch ? batch : count - primitive; 795 796 draw->primitive += batch; 797 798 Task &task = taskQueue[qHead]; 799 task.type = Task::PRIMITIVES; 800 task.primitiveUnit = unit; 801 802 primitiveProgress[unit].references = -1; 803 804 // Commit to the task queue 805 qHead = (qHead + 1) % 32; 806 qSize++; 807 } 808 } 809 } 810 scheduleTask(int threadIndex)811 void Renderer::scheduleTask(int threadIndex) 812 { 813 schedulerMutex.lock(); 814 815 if((int)qSize < threadCount - threadsAwake + 1) 816 { 817 findAvailableTasks(); 818 } 819 820 if(qSize != 0) 821 { 822 task[threadIndex] = taskQueue[(qHead - qSize) % 32]; 823 qSize--; 824 825 if(threadsAwake != threadCount) 826 { 827 int wakeup = qSize - threadsAwake + 1; 828 829 for(int i = 0; i < threadCount && wakeup > 0; i++) 830 { 831 if(task[i].type == Task::SUSPEND) 832 { 833 suspend[i]->wait(); 834 task[i].type = Task::RESUME; 835 resume[i]->signal(); 836 837 threadsAwake++; 838 wakeup--; 839 } 840 } 841 } 842 } 843 else 844 { 845 task[threadIndex].type = Task::SUSPEND; 846 847 threadsAwake--; 848 } 849 850 schedulerMutex.unlock(); 851 } 852 executeTask(int threadIndex)853 void Renderer::executeTask(int threadIndex) 854 { 855 #if PERF_HUD 856 int64_t startTick = Timer::ticks(); 857 #endif 858 859 switch(task[threadIndex].type) 860 { 861 case Task::PRIMITIVES: 862 { 863 int unit = task[threadIndex].primitiveUnit; 864 865 int input = primitiveProgress[unit].firstPrimitive; 866 int count = primitiveProgress[unit].primitiveCount; 867 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 868 int (Renderer::*setupPrimitives)(int batch, int count) = draw->setupPrimitives; 869 870 processPrimitiveVertices(unit, input, count, draw->count, threadIndex); 871 872 #if PERF_HUD 873 int64_t time = Timer::ticks(); 874 vertexTime[threadIndex] += time - startTick; 875 startTick = time; 876 #endif 877 878 int visible = 0; 879 880 if(!draw->setupState.rasterizerDiscard) 881 { 882 visible = (this->*setupPrimitives)(unit, count); 883 } 884 885 primitiveProgress[unit].visible = visible; 886 primitiveProgress[unit].references = clusterCount; 887 888 #if PERF_HUD 889 setupTime[threadIndex] += Timer::ticks() - startTick; 890 #endif 891 } 892 break; 893 case Task::PIXELS: 894 { 895 int unit = task[threadIndex].primitiveUnit; 896 int visible = primitiveProgress[unit].visible; 897 898 if(visible > 0) 899 { 900 int cluster = task[threadIndex].pixelCluster; 901 Primitive *primitive = primitiveBatch[unit]; 902 DrawCall *draw = drawList[pixelProgress[cluster].drawCall % DRAW_COUNT]; 903 DrawData *data = draw->data; 904 PixelProcessor::RoutinePointer pixelRoutine = draw->pixelPointer; 905 906 pixelRoutine(primitive, visible, cluster, data); 907 } 908 909 finishRendering(task[threadIndex]); 910 911 #if PERF_HUD 912 pixelTime[threadIndex] += Timer::ticks() - startTick; 913 #endif 914 } 915 break; 916 case Task::RESUME: 917 break; 918 case Task::SUSPEND: 919 break; 920 default: 921 ASSERT(false); 922 } 923 } 924 synchronize()925 void Renderer::synchronize() 926 { 927 sync->lock(sw::PUBLIC); 928 sync->unlock(); 929 } 930 finishRendering(Task & pixelTask)931 void Renderer::finishRendering(Task &pixelTask) 932 { 933 int unit = pixelTask.primitiveUnit; 934 int cluster = pixelTask.pixelCluster; 935 936 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 937 DrawData &data = *draw.data; 938 int primitive = primitiveProgress[unit].firstPrimitive; 939 int count = primitiveProgress[unit].primitiveCount; 940 int processedPrimitives = primitive + count; 941 942 pixelProgress[cluster].processedPrimitives = processedPrimitives; 943 944 if(pixelProgress[cluster].processedPrimitives >= draw.count) 945 { 946 pixelProgress[cluster].drawCall++; 947 pixelProgress[cluster].processedPrimitives = 0; 948 } 949 950 int ref = atomicDecrement(&primitiveProgress[unit].references); 951 952 if(ref == 0) 953 { 954 ref = atomicDecrement(&draw.references); 955 956 if(ref == 0) 957 { 958 #if PERF_PROFILE 959 for(int cluster = 0; cluster < clusterCount; cluster++) 960 { 961 for(int i = 0; i < PERF_TIMERS; i++) 962 { 963 profiler.cycles[i] += data.cycles[i][cluster]; 964 } 965 } 966 #endif 967 968 if(draw.queries) 969 { 970 for(std::list<Query*>::iterator q = draw.queries->begin(); q != draw.queries->end(); q++) 971 { 972 Query *query = *q; 973 974 switch(query->type) 975 { 976 case Query::FRAGMENTS_PASSED: 977 for(int cluster = 0; cluster < clusterCount; cluster++) 978 { 979 atomicAdd((volatile int*)&query->data, data.occlusion[cluster]); 980 } 981 break; 982 case Query::TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: 983 atomicAdd((volatile int*)&query->data, processedPrimitives); 984 break; 985 default: 986 break; 987 } 988 989 atomicDecrement(&query->reference); 990 } 991 992 delete draw.queries; 993 draw.queries = 0; 994 } 995 996 for(int i = 0; i < RENDERTARGETS; i++) 997 { 998 if(draw.renderTarget[i]) 999 { 1000 draw.renderTarget[i]->unlockInternal(); 1001 } 1002 } 1003 1004 if(draw.depthBuffer) 1005 { 1006 draw.depthBuffer->unlockInternal(); 1007 } 1008 1009 if(draw.stencilBuffer) 1010 { 1011 draw.stencilBuffer->unlockStencil(); 1012 } 1013 1014 for(int i = 0; i < TOTAL_IMAGE_UNITS; i++) 1015 { 1016 if(draw.texture[i]) 1017 { 1018 draw.texture[i]->unlock(); 1019 } 1020 } 1021 1022 for(int i = 0; i < MAX_VERTEX_INPUTS; i++) 1023 { 1024 if(draw.vertexStream[i]) 1025 { 1026 draw.vertexStream[i]->unlock(); 1027 } 1028 } 1029 1030 if(draw.indexBuffer) 1031 { 1032 draw.indexBuffer->unlock(); 1033 } 1034 1035 for(int i = 0; i < MAX_UNIFORM_BUFFER_BINDINGS; i++) 1036 { 1037 if(draw.pUniformBuffers[i]) 1038 { 1039 draw.pUniformBuffers[i]->unlock(); 1040 } 1041 if(draw.vUniformBuffers[i]) 1042 { 1043 draw.vUniformBuffers[i]->unlock(); 1044 } 1045 } 1046 1047 for(int i = 0; i < MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS; i++) 1048 { 1049 if(draw.transformFeedbackBuffers[i]) 1050 { 1051 draw.transformFeedbackBuffers[i]->unlock(); 1052 } 1053 } 1054 1055 draw.vertexRoutine->unbind(); 1056 draw.setupRoutine->unbind(); 1057 draw.pixelRoutine->unbind(); 1058 1059 sync->unlock(); 1060 1061 draw.references = -1; 1062 resumeApp->signal(); 1063 } 1064 } 1065 1066 pixelProgress[cluster].executing = false; 1067 } 1068 processPrimitiveVertices(int unit,unsigned int start,unsigned int triangleCount,unsigned int loop,int thread)1069 void Renderer::processPrimitiveVertices(int unit, unsigned int start, unsigned int triangleCount, unsigned int loop, int thread) 1070 { 1071 Triangle *triangle = triangleBatch[unit]; 1072 DrawCall *draw = drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1073 DrawData *data = draw->data; 1074 VertexTask *task = vertexTask[thread]; 1075 1076 const void *indices = data->indices; 1077 VertexProcessor::RoutinePointer vertexRoutine = draw->vertexPointer; 1078 1079 if(task->vertexCache.drawCall != primitiveProgress[unit].drawCall) 1080 { 1081 task->vertexCache.clear(); 1082 task->vertexCache.drawCall = primitiveProgress[unit].drawCall; 1083 } 1084 1085 unsigned int batch[128][3]; // FIXME: Adjust to dynamic batch size 1086 1087 switch(draw->drawType) 1088 { 1089 case DRAW_POINTLIST: 1090 { 1091 unsigned int index = start; 1092 1093 for(unsigned int i = 0; i < triangleCount; i++) 1094 { 1095 batch[i][0] = index; 1096 batch[i][1] = index; 1097 batch[i][2] = index; 1098 1099 index += 1; 1100 } 1101 } 1102 break; 1103 case DRAW_LINELIST: 1104 { 1105 unsigned int index = 2 * start; 1106 1107 for(unsigned int i = 0; i < triangleCount; i++) 1108 { 1109 batch[i][0] = index + 0; 1110 batch[i][1] = index + 1; 1111 batch[i][2] = index + 1; 1112 1113 index += 2; 1114 } 1115 } 1116 break; 1117 case DRAW_LINESTRIP: 1118 { 1119 unsigned int index = start; 1120 1121 for(unsigned int i = 0; i < triangleCount; i++) 1122 { 1123 batch[i][0] = index + 0; 1124 batch[i][1] = index + 1; 1125 batch[i][2] = index + 1; 1126 1127 index += 1; 1128 } 1129 } 1130 break; 1131 case DRAW_LINELOOP: 1132 { 1133 unsigned int index = start; 1134 1135 for(unsigned int i = 0; i < triangleCount; i++) 1136 { 1137 batch[i][0] = (index + 0) % loop; 1138 batch[i][1] = (index + 1) % loop; 1139 batch[i][2] = (index + 1) % loop; 1140 1141 index += 1; 1142 } 1143 } 1144 break; 1145 case DRAW_TRIANGLELIST: 1146 { 1147 unsigned int index = 3 * start; 1148 1149 for(unsigned int i = 0; i < triangleCount; i++) 1150 { 1151 batch[i][0] = index + 0; 1152 batch[i][1] = index + 1; 1153 batch[i][2] = index + 2; 1154 1155 index += 3; 1156 } 1157 } 1158 break; 1159 case DRAW_TRIANGLESTRIP: 1160 { 1161 unsigned int index = start; 1162 1163 for(unsigned int i = 0; i < triangleCount; i++) 1164 { 1165 batch[i][0] = index + 0; 1166 batch[i][1] = index + (index & 1) + 1; 1167 batch[i][2] = index + (~index & 1) + 1; 1168 1169 index += 1; 1170 } 1171 } 1172 break; 1173 case DRAW_TRIANGLEFAN: 1174 { 1175 unsigned int index = start; 1176 1177 for(unsigned int i = 0; i < triangleCount; i++) 1178 { 1179 batch[i][0] = index + 1; 1180 batch[i][1] = index + 2; 1181 batch[i][2] = 0; 1182 1183 index += 1; 1184 } 1185 } 1186 break; 1187 case DRAW_INDEXEDPOINTLIST8: 1188 { 1189 const unsigned char *index = (const unsigned char*)indices + start; 1190 1191 for(unsigned int i = 0; i < triangleCount; i++) 1192 { 1193 batch[i][0] = *index; 1194 batch[i][1] = *index; 1195 batch[i][2] = *index; 1196 1197 index += 1; 1198 } 1199 } 1200 break; 1201 case DRAW_INDEXEDPOINTLIST16: 1202 { 1203 const unsigned short *index = (const unsigned short*)indices + start; 1204 1205 for(unsigned int i = 0; i < triangleCount; i++) 1206 { 1207 batch[i][0] = *index; 1208 batch[i][1] = *index; 1209 batch[i][2] = *index; 1210 1211 index += 1; 1212 } 1213 } 1214 break; 1215 case DRAW_INDEXEDPOINTLIST32: 1216 { 1217 const unsigned int *index = (const unsigned int*)indices + start; 1218 1219 for(unsigned int i = 0; i < triangleCount; i++) 1220 { 1221 batch[i][0] = *index; 1222 batch[i][1] = *index; 1223 batch[i][2] = *index; 1224 1225 index += 1; 1226 } 1227 } 1228 break; 1229 case DRAW_INDEXEDLINELIST8: 1230 { 1231 const unsigned char *index = (const unsigned char*)indices + 2 * start; 1232 1233 for(unsigned int i = 0; i < triangleCount; i++) 1234 { 1235 batch[i][0] = index[0]; 1236 batch[i][1] = index[1]; 1237 batch[i][2] = index[1]; 1238 1239 index += 2; 1240 } 1241 } 1242 break; 1243 case DRAW_INDEXEDLINELIST16: 1244 { 1245 const unsigned short *index = (const unsigned short*)indices + 2 * start; 1246 1247 for(unsigned int i = 0; i < triangleCount; i++) 1248 { 1249 batch[i][0] = index[0]; 1250 batch[i][1] = index[1]; 1251 batch[i][2] = index[1]; 1252 1253 index += 2; 1254 } 1255 } 1256 break; 1257 case DRAW_INDEXEDLINELIST32: 1258 { 1259 const unsigned int *index = (const unsigned int*)indices + 2 * start; 1260 1261 for(unsigned int i = 0; i < triangleCount; i++) 1262 { 1263 batch[i][0] = index[0]; 1264 batch[i][1] = index[1]; 1265 batch[i][2] = index[1]; 1266 1267 index += 2; 1268 } 1269 } 1270 break; 1271 case DRAW_INDEXEDLINESTRIP8: 1272 { 1273 const unsigned char *index = (const unsigned char*)indices + start; 1274 1275 for(unsigned int i = 0; i < triangleCount; i++) 1276 { 1277 batch[i][0] = index[0]; 1278 batch[i][1] = index[1]; 1279 batch[i][2] = index[1]; 1280 1281 index += 1; 1282 } 1283 } 1284 break; 1285 case DRAW_INDEXEDLINESTRIP16: 1286 { 1287 const unsigned short *index = (const unsigned short*)indices + start; 1288 1289 for(unsigned int i = 0; i < triangleCount; i++) 1290 { 1291 batch[i][0] = index[0]; 1292 batch[i][1] = index[1]; 1293 batch[i][2] = index[1]; 1294 1295 index += 1; 1296 } 1297 } 1298 break; 1299 case DRAW_INDEXEDLINESTRIP32: 1300 { 1301 const unsigned int *index = (const unsigned int*)indices + start; 1302 1303 for(unsigned int i = 0; i < triangleCount; i++) 1304 { 1305 batch[i][0] = index[0]; 1306 batch[i][1] = index[1]; 1307 batch[i][2] = index[1]; 1308 1309 index += 1; 1310 } 1311 } 1312 break; 1313 case DRAW_INDEXEDLINELOOP8: 1314 { 1315 const unsigned char *index = (const unsigned char*)indices; 1316 1317 for(unsigned int i = 0; i < triangleCount; i++) 1318 { 1319 batch[i][0] = index[(start + i + 0) % loop]; 1320 batch[i][1] = index[(start + i + 1) % loop]; 1321 batch[i][2] = index[(start + i + 1) % loop]; 1322 } 1323 } 1324 break; 1325 case DRAW_INDEXEDLINELOOP16: 1326 { 1327 const unsigned short *index = (const unsigned short*)indices; 1328 1329 for(unsigned int i = 0; i < triangleCount; i++) 1330 { 1331 batch[i][0] = index[(start + i + 0) % loop]; 1332 batch[i][1] = index[(start + i + 1) % loop]; 1333 batch[i][2] = index[(start + i + 1) % loop]; 1334 } 1335 } 1336 break; 1337 case DRAW_INDEXEDLINELOOP32: 1338 { 1339 const unsigned int *index = (const unsigned int*)indices; 1340 1341 for(unsigned int i = 0; i < triangleCount; i++) 1342 { 1343 batch[i][0] = index[(start + i + 0) % loop]; 1344 batch[i][1] = index[(start + i + 1) % loop]; 1345 batch[i][2] = index[(start + i + 1) % loop]; 1346 } 1347 } 1348 break; 1349 case DRAW_INDEXEDTRIANGLELIST8: 1350 { 1351 const unsigned char *index = (const unsigned char*)indices + 3 * start; 1352 1353 for(unsigned int i = 0; i < triangleCount; i++) 1354 { 1355 batch[i][0] = index[0]; 1356 batch[i][1] = index[1]; 1357 batch[i][2] = index[2]; 1358 1359 index += 3; 1360 } 1361 } 1362 break; 1363 case DRAW_INDEXEDTRIANGLELIST16: 1364 { 1365 const unsigned short *index = (const unsigned short*)indices + 3 * start; 1366 1367 for(unsigned int i = 0; i < triangleCount; i++) 1368 { 1369 batch[i][0] = index[0]; 1370 batch[i][1] = index[1]; 1371 batch[i][2] = index[2]; 1372 1373 index += 3; 1374 } 1375 } 1376 break; 1377 case DRAW_INDEXEDTRIANGLELIST32: 1378 { 1379 const unsigned int *index = (const unsigned int*)indices + 3 * start; 1380 1381 for(unsigned int i = 0; i < triangleCount; i++) 1382 { 1383 batch[i][0] = index[0]; 1384 batch[i][1] = index[1]; 1385 batch[i][2] = index[2]; 1386 1387 index += 3; 1388 } 1389 } 1390 break; 1391 case DRAW_INDEXEDTRIANGLESTRIP8: 1392 { 1393 const unsigned char *index = (const unsigned char*)indices + start; 1394 1395 for(unsigned int i = 0; i < triangleCount; i++) 1396 { 1397 batch[i][0] = index[0]; 1398 batch[i][1] = index[((start + i) & 1) + 1]; 1399 batch[i][2] = index[(~(start + i) & 1) + 1]; 1400 1401 index += 1; 1402 } 1403 } 1404 break; 1405 case DRAW_INDEXEDTRIANGLESTRIP16: 1406 { 1407 const unsigned short *index = (const unsigned short*)indices + start; 1408 1409 for(unsigned int i = 0; i < triangleCount; i++) 1410 { 1411 batch[i][0] = index[0]; 1412 batch[i][1] = index[((start + i) & 1) + 1]; 1413 batch[i][2] = index[(~(start + i) & 1) + 1]; 1414 1415 index += 1; 1416 } 1417 } 1418 break; 1419 case DRAW_INDEXEDTRIANGLESTRIP32: 1420 { 1421 const unsigned int *index = (const unsigned int*)indices + start; 1422 1423 for(unsigned int i = 0; i < triangleCount; i++) 1424 { 1425 batch[i][0] = index[0]; 1426 batch[i][1] = index[((start + i) & 1) + 1]; 1427 batch[i][2] = index[(~(start + i) & 1) + 1]; 1428 1429 index += 1; 1430 } 1431 } 1432 break; 1433 case DRAW_INDEXEDTRIANGLEFAN8: 1434 { 1435 const unsigned char *index = (const unsigned char*)indices; 1436 1437 for(unsigned int i = 0; i < triangleCount; i++) 1438 { 1439 batch[i][0] = index[start + i + 1]; 1440 batch[i][1] = index[start + i + 2]; 1441 batch[i][2] = index[0]; 1442 } 1443 } 1444 break; 1445 case DRAW_INDEXEDTRIANGLEFAN16: 1446 { 1447 const unsigned short *index = (const unsigned short*)indices; 1448 1449 for(unsigned int i = 0; i < triangleCount; i++) 1450 { 1451 batch[i][0] = index[start + i + 1]; 1452 batch[i][1] = index[start + i + 2]; 1453 batch[i][2] = index[0]; 1454 } 1455 } 1456 break; 1457 case DRAW_INDEXEDTRIANGLEFAN32: 1458 { 1459 const unsigned int *index = (const unsigned int*)indices; 1460 1461 for(unsigned int i = 0; i < triangleCount; i++) 1462 { 1463 batch[i][0] = index[start + i + 1]; 1464 batch[i][1] = index[start + i + 2]; 1465 batch[i][2] = index[0]; 1466 } 1467 } 1468 break; 1469 case DRAW_QUADLIST: 1470 { 1471 unsigned int index = 4 * start / 2; 1472 1473 for(unsigned int i = 0; i < triangleCount; i += 2) 1474 { 1475 batch[i+0][0] = index + 0; 1476 batch[i+0][1] = index + 1; 1477 batch[i+0][2] = index + 2; 1478 1479 batch[i+1][0] = index + 0; 1480 batch[i+1][1] = index + 2; 1481 batch[i+1][2] = index + 3; 1482 1483 index += 4; 1484 } 1485 } 1486 break; 1487 default: 1488 ASSERT(false); 1489 return; 1490 } 1491 1492 task->primitiveStart = start; 1493 task->vertexCount = triangleCount * 3; 1494 vertexRoutine(&triangle->v0, (unsigned int*)&batch, task, data); 1495 } 1496 setupSolidTriangles(int unit,int count)1497 int Renderer::setupSolidTriangles(int unit, int count) 1498 { 1499 Triangle *triangle = triangleBatch[unit]; 1500 Primitive *primitive = primitiveBatch[unit]; 1501 1502 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1503 SetupProcessor::State &state = draw.setupState; 1504 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1505 1506 int ms = state.multiSample; 1507 int pos = state.positionRegister; 1508 const DrawData *data = draw.data; 1509 int visible = 0; 1510 1511 for(int i = 0; i < count; i++, triangle++) 1512 { 1513 Vertex &v0 = triangle->v0; 1514 Vertex &v1 = triangle->v1; 1515 Vertex &v2 = triangle->v2; 1516 1517 if((v0.clipFlags & v1.clipFlags & v2.clipFlags) == Clipper::CLIP_FINITE) 1518 { 1519 Polygon polygon(&v0.v[pos], &v1.v[pos], &v2.v[pos]); 1520 1521 int clipFlagsOr = v0.clipFlags | v1.clipFlags | v2.clipFlags | draw.clipFlags; 1522 1523 if(clipFlagsOr != Clipper::CLIP_FINITE) 1524 { 1525 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1526 { 1527 continue; 1528 } 1529 } 1530 1531 if(setupRoutine(primitive, triangle, &polygon, data)) 1532 { 1533 primitive += ms; 1534 visible++; 1535 } 1536 } 1537 } 1538 1539 return visible; 1540 } 1541 setupWireframeTriangle(int unit,int count)1542 int Renderer::setupWireframeTriangle(int unit, int count) 1543 { 1544 Triangle *triangle = triangleBatch[unit]; 1545 Primitive *primitive = primitiveBatch[unit]; 1546 int visible = 0; 1547 1548 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1549 SetupProcessor::State &state = draw.setupState; 1550 1551 const Vertex &v0 = triangle[0].v0; 1552 const Vertex &v1 = triangle[0].v1; 1553 const Vertex &v2 = triangle[0].v2; 1554 1555 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1556 1557 if(state.cullMode == CULL_CLOCKWISE) 1558 { 1559 if(d >= 0) return 0; 1560 } 1561 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1562 { 1563 if(d <= 0) return 0; 1564 } 1565 1566 // Copy attributes 1567 triangle[1].v0 = v1; 1568 triangle[1].v1 = v2; 1569 triangle[2].v0 = v2; 1570 triangle[2].v1 = v0; 1571 1572 if(state.color[0][0].flat) // FIXME 1573 { 1574 for(int i = 0; i < 2; i++) 1575 { 1576 triangle[1].v0.C[i] = triangle[0].v0.C[i]; 1577 triangle[1].v1.C[i] = triangle[0].v0.C[i]; 1578 triangle[2].v0.C[i] = triangle[0].v0.C[i]; 1579 triangle[2].v1.C[i] = triangle[0].v0.C[i]; 1580 } 1581 } 1582 1583 for(int i = 0; i < 3; i++) 1584 { 1585 if(setupLine(*primitive, *triangle, draw)) 1586 { 1587 primitive->area = 0.5f * d; 1588 1589 primitive++; 1590 visible++; 1591 } 1592 1593 triangle++; 1594 } 1595 1596 return visible; 1597 } 1598 setupVertexTriangle(int unit,int count)1599 int Renderer::setupVertexTriangle(int unit, int count) 1600 { 1601 Triangle *triangle = triangleBatch[unit]; 1602 Primitive *primitive = primitiveBatch[unit]; 1603 int visible = 0; 1604 1605 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1606 SetupProcessor::State &state = draw.setupState; 1607 1608 const Vertex &v0 = triangle[0].v0; 1609 const Vertex &v1 = triangle[0].v1; 1610 const Vertex &v2 = triangle[0].v2; 1611 1612 float d = (v0.y * v1.x - v0.x * v1.y) * v2.w + (v0.x * v2.y - v0.y * v2.x) * v1.w + (v2.x * v1.y - v1.x * v2.y) * v0.w; 1613 1614 if(state.cullMode == CULL_CLOCKWISE) 1615 { 1616 if(d >= 0) return 0; 1617 } 1618 else if(state.cullMode == CULL_COUNTERCLOCKWISE) 1619 { 1620 if(d <= 0) return 0; 1621 } 1622 1623 // Copy attributes 1624 triangle[1].v0 = v1; 1625 triangle[2].v0 = v2; 1626 1627 for(int i = 0; i < 3; i++) 1628 { 1629 if(setupPoint(*primitive, *triangle, draw)) 1630 { 1631 primitive->area = 0.5f * d; 1632 1633 primitive++; 1634 visible++; 1635 } 1636 1637 triangle++; 1638 } 1639 1640 return visible; 1641 } 1642 setupLines(int unit,int count)1643 int Renderer::setupLines(int unit, int count) 1644 { 1645 Triangle *triangle = triangleBatch[unit]; 1646 Primitive *primitive = primitiveBatch[unit]; 1647 int visible = 0; 1648 1649 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1650 SetupProcessor::State &state = draw.setupState; 1651 1652 int ms = state.multiSample; 1653 1654 for(int i = 0; i < count; i++) 1655 { 1656 if(setupLine(*primitive, *triangle, draw)) 1657 { 1658 primitive += ms; 1659 visible++; 1660 } 1661 1662 triangle++; 1663 } 1664 1665 return visible; 1666 } 1667 setupPoints(int unit,int count)1668 int Renderer::setupPoints(int unit, int count) 1669 { 1670 Triangle *triangle = triangleBatch[unit]; 1671 Primitive *primitive = primitiveBatch[unit]; 1672 int visible = 0; 1673 1674 DrawCall &draw = *drawList[primitiveProgress[unit].drawCall % DRAW_COUNT]; 1675 SetupProcessor::State &state = draw.setupState; 1676 1677 int ms = state.multiSample; 1678 1679 for(int i = 0; i < count; i++) 1680 { 1681 if(setupPoint(*primitive, *triangle, draw)) 1682 { 1683 primitive += ms; 1684 visible++; 1685 } 1686 1687 triangle++; 1688 } 1689 1690 return visible; 1691 } 1692 setupLine(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1693 bool Renderer::setupLine(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1694 { 1695 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1696 const SetupProcessor::State &state = draw.setupState; 1697 const DrawData &data = *draw.data; 1698 1699 float lineWidth = data.lineWidth; 1700 1701 Vertex &v0 = triangle.v0; 1702 Vertex &v1 = triangle.v1; 1703 1704 int pos = state.positionRegister; 1705 1706 const float4 &P0 = v0.v[pos]; 1707 const float4 &P1 = v1.v[pos]; 1708 1709 if(P0.w <= 0 && P1.w <= 0) 1710 { 1711 return false; 1712 } 1713 1714 const float W = data.Wx16[0] * (1.0f / 16.0f); 1715 const float H = data.Hx16[0] * (1.0f / 16.0f); 1716 1717 float dx = W * (P1.x / P1.w - P0.x / P0.w); 1718 float dy = H * (P1.y / P1.w - P0.y / P0.w); 1719 1720 if(dx == 0 && dy == 0) 1721 { 1722 return false; 1723 } 1724 1725 if(false) // Rectangle 1726 { 1727 float4 P[4]; 1728 int C[4]; 1729 1730 P[0] = P0; 1731 P[1] = P1; 1732 P[2] = P1; 1733 P[3] = P0; 1734 1735 float scale = lineWidth * 0.5f / sqrt(dx*dx + dy*dy); 1736 1737 dx *= scale; 1738 dy *= scale; 1739 1740 float dx0w = dx * P0.w / W; 1741 float dy0h = dy * P0.w / H; 1742 float dx0h = dx * P0.w / H; 1743 float dy0w = dy * P0.w / W; 1744 1745 float dx1w = dx * P1.w / W; 1746 float dy1h = dy * P1.w / H; 1747 float dx1h = dx * P1.w / H; 1748 float dy1w = dy * P1.w / W; 1749 1750 P[0].x += -dy0w + -dx0w; 1751 P[0].y += -dx0h + +dy0h; 1752 C[0] = clipper->computeClipFlags(P[0]); 1753 1754 P[1].x += -dy1w + +dx1w; 1755 P[1].y += -dx1h + +dy1h; 1756 C[1] = clipper->computeClipFlags(P[1]); 1757 1758 P[2].x += +dy1w + +dx1w; 1759 P[2].y += +dx1h + -dy1h; 1760 C[2] = clipper->computeClipFlags(P[2]); 1761 1762 P[3].x += +dy0w + -dx0w; 1763 P[3].y += +dx0h + +dy0h; 1764 C[3] = clipper->computeClipFlags(P[3]); 1765 1766 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1767 { 1768 Polygon polygon(P, 4); 1769 1770 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1771 1772 if(clipFlagsOr != Clipper::CLIP_FINITE) 1773 { 1774 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1775 { 1776 return false; 1777 } 1778 } 1779 1780 return setupRoutine(&primitive, &triangle, &polygon, &data); 1781 } 1782 } 1783 else // Diamond test convention 1784 { 1785 float4 P[8]; 1786 int C[8]; 1787 1788 P[0] = P0; 1789 P[1] = P0; 1790 P[2] = P0; 1791 P[3] = P0; 1792 P[4] = P1; 1793 P[5] = P1; 1794 P[6] = P1; 1795 P[7] = P1; 1796 1797 float dx0 = lineWidth * 0.5f * P0.w / W; 1798 float dy0 = lineWidth * 0.5f * P0.w / H; 1799 1800 float dx1 = lineWidth * 0.5f * P1.w / W; 1801 float dy1 = lineWidth * 0.5f * P1.w / H; 1802 1803 P[0].x += -dx0; 1804 C[0] = clipper->computeClipFlags(P[0]); 1805 1806 P[1].y += +dy0; 1807 C[1] = clipper->computeClipFlags(P[1]); 1808 1809 P[2].x += +dx0; 1810 C[2] = clipper->computeClipFlags(P[2]); 1811 1812 P[3].y += -dy0; 1813 C[3] = clipper->computeClipFlags(P[3]); 1814 1815 P[4].x += -dx1; 1816 C[4] = clipper->computeClipFlags(P[4]); 1817 1818 P[5].y += +dy1; 1819 C[5] = clipper->computeClipFlags(P[5]); 1820 1821 P[6].x += +dx1; 1822 C[6] = clipper->computeClipFlags(P[6]); 1823 1824 P[7].y += -dy1; 1825 C[7] = clipper->computeClipFlags(P[7]); 1826 1827 if((C[0] & C[1] & C[2] & C[3] & C[4] & C[5] & C[6] & C[7]) == Clipper::CLIP_FINITE) 1828 { 1829 float4 L[6]; 1830 1831 if(dx > -dy) 1832 { 1833 if(dx > dy) // Right 1834 { 1835 L[0] = P[0]; 1836 L[1] = P[1]; 1837 L[2] = P[5]; 1838 L[3] = P[6]; 1839 L[4] = P[7]; 1840 L[5] = P[3]; 1841 } 1842 else // Down 1843 { 1844 L[0] = P[0]; 1845 L[1] = P[4]; 1846 L[2] = P[5]; 1847 L[3] = P[6]; 1848 L[4] = P[2]; 1849 L[5] = P[3]; 1850 } 1851 } 1852 else 1853 { 1854 if(dx > dy) // Up 1855 { 1856 L[0] = P[0]; 1857 L[1] = P[1]; 1858 L[2] = P[2]; 1859 L[3] = P[6]; 1860 L[4] = P[7]; 1861 L[5] = P[4]; 1862 } 1863 else // Left 1864 { 1865 L[0] = P[1]; 1866 L[1] = P[2]; 1867 L[2] = P[3]; 1868 L[3] = P[7]; 1869 L[4] = P[4]; 1870 L[5] = P[5]; 1871 } 1872 } 1873 1874 Polygon polygon(L, 6); 1875 1876 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | C[4] | C[5] | C[6] | C[7] | draw.clipFlags; 1877 1878 if(clipFlagsOr != Clipper::CLIP_FINITE) 1879 { 1880 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1881 { 1882 return false; 1883 } 1884 } 1885 1886 return setupRoutine(&primitive, &triangle, &polygon, &data); 1887 } 1888 } 1889 1890 return false; 1891 } 1892 setupPoint(Primitive & primitive,Triangle & triangle,const DrawCall & draw)1893 bool Renderer::setupPoint(Primitive &primitive, Triangle &triangle, const DrawCall &draw) 1894 { 1895 const SetupProcessor::RoutinePointer &setupRoutine = draw.setupPointer; 1896 const SetupProcessor::State &state = draw.setupState; 1897 const DrawData &data = *draw.data; 1898 1899 Vertex &v = triangle.v0; 1900 1901 float pSize; 1902 1903 int pts = state.pointSizeRegister; 1904 1905 if(state.pointSizeRegister != Unused) 1906 { 1907 pSize = v.v[pts].y; 1908 } 1909 else 1910 { 1911 pSize = data.point.pointSize[0]; 1912 } 1913 1914 pSize = clamp(pSize, data.point.pointSizeMin, data.point.pointSizeMax); 1915 1916 float4 P[4]; 1917 int C[4]; 1918 1919 int pos = state.positionRegister; 1920 1921 P[0] = v.v[pos]; 1922 P[1] = v.v[pos]; 1923 P[2] = v.v[pos]; 1924 P[3] = v.v[pos]; 1925 1926 const float X = pSize * P[0].w * data.halfPixelX[0]; 1927 const float Y = pSize * P[0].w * data.halfPixelY[0]; 1928 1929 P[0].x -= X; 1930 P[0].y += Y; 1931 C[0] = clipper->computeClipFlags(P[0]); 1932 1933 P[1].x += X; 1934 P[1].y += Y; 1935 C[1] = clipper->computeClipFlags(P[1]); 1936 1937 P[2].x += X; 1938 P[2].y -= Y; 1939 C[2] = clipper->computeClipFlags(P[2]); 1940 1941 P[3].x -= X; 1942 P[3].y -= Y; 1943 C[3] = clipper->computeClipFlags(P[3]); 1944 1945 triangle.v1 = triangle.v0; 1946 triangle.v2 = triangle.v0; 1947 1948 triangle.v1.X += iround(16 * 0.5f * pSize); 1949 triangle.v2.Y -= iround(16 * 0.5f * pSize) * (data.Hx16[0] > 0.0f ? 1 : -1); // Both Direct3D and OpenGL expect (0, 0) in the top-left corner 1950 1951 Polygon polygon(P, 4); 1952 1953 if((C[0] & C[1] & C[2] & C[3]) == Clipper::CLIP_FINITE) 1954 { 1955 int clipFlagsOr = C[0] | C[1] | C[2] | C[3] | draw.clipFlags; 1956 1957 if(clipFlagsOr != Clipper::CLIP_FINITE) 1958 { 1959 if(!clipper->clip(polygon, clipFlagsOr, draw)) 1960 { 1961 return false; 1962 } 1963 } 1964 1965 return setupRoutine(&primitive, &triangle, &polygon, &data); 1966 } 1967 1968 return false; 1969 } 1970 initializeThreads()1971 void Renderer::initializeThreads() 1972 { 1973 unitCount = ceilPow2(threadCount); 1974 clusterCount = ceilPow2(threadCount); 1975 1976 for(int i = 0; i < unitCount; i++) 1977 { 1978 triangleBatch[i] = (Triangle*)allocate(batchSize * sizeof(Triangle)); 1979 primitiveBatch[i] = (Primitive*)allocate(batchSize * sizeof(Primitive)); 1980 } 1981 1982 for(int i = 0; i < threadCount; i++) 1983 { 1984 vertexTask[i] = (VertexTask*)allocate(sizeof(VertexTask)); 1985 vertexTask[i]->vertexCache.drawCall = -1; 1986 1987 task[i].type = Task::SUSPEND; 1988 1989 resume[i] = new Event(); 1990 suspend[i] = new Event(); 1991 1992 Parameters parameters; 1993 parameters.threadIndex = i; 1994 parameters.renderer = this; 1995 1996 exitThreads = false; 1997 worker[i] = new Thread(threadFunction, ¶meters); 1998 1999 suspend[i]->wait(); 2000 suspend[i]->signal(); 2001 } 2002 } 2003 terminateThreads()2004 void Renderer::terminateThreads() 2005 { 2006 while(threadsAwake != 0) 2007 { 2008 Thread::sleep(1); 2009 } 2010 2011 for(int thread = 0; thread < threadCount; thread++) 2012 { 2013 if(worker[thread]) 2014 { 2015 exitThreads = true; 2016 resume[thread]->signal(); 2017 worker[thread]->join(); 2018 2019 delete worker[thread]; 2020 worker[thread] = 0; 2021 delete resume[thread]; 2022 resume[thread] = 0; 2023 delete suspend[thread]; 2024 suspend[thread] = 0; 2025 } 2026 2027 deallocate(vertexTask[thread]); 2028 vertexTask[thread] = 0; 2029 } 2030 2031 for(int i = 0; i < 16; i++) 2032 { 2033 deallocate(triangleBatch[i]); 2034 triangleBatch[i] = 0; 2035 2036 deallocate(primitiveBatch[i]); 2037 primitiveBatch[i] = 0; 2038 } 2039 } 2040 loadConstants(const VertexShader * vertexShader)2041 void Renderer::loadConstants(const VertexShader *vertexShader) 2042 { 2043 if(!vertexShader) return; 2044 2045 size_t count = vertexShader->getLength(); 2046 2047 for(size_t i = 0; i < count; i++) 2048 { 2049 const Shader::Instruction *instruction = vertexShader->getInstruction(i); 2050 2051 if(instruction->opcode == Shader::OPCODE_DEF) 2052 { 2053 int index = instruction->dst.index; 2054 float value[4]; 2055 2056 value[0] = instruction->src[0].value[0]; 2057 value[1] = instruction->src[0].value[1]; 2058 value[2] = instruction->src[0].value[2]; 2059 value[3] = instruction->src[0].value[3]; 2060 2061 setVertexShaderConstantF(index, value); 2062 } 2063 else if(instruction->opcode == Shader::OPCODE_DEFI) 2064 { 2065 int index = instruction->dst.index; 2066 int integer[4]; 2067 2068 integer[0] = instruction->src[0].integer[0]; 2069 integer[1] = instruction->src[0].integer[1]; 2070 integer[2] = instruction->src[0].integer[2]; 2071 integer[3] = instruction->src[0].integer[3]; 2072 2073 setVertexShaderConstantI(index, integer); 2074 } 2075 else if(instruction->opcode == Shader::OPCODE_DEFB) 2076 { 2077 int index = instruction->dst.index; 2078 int boolean = instruction->src[0].boolean[0]; 2079 2080 setVertexShaderConstantB(index, &boolean); 2081 } 2082 } 2083 } 2084 loadConstants(const PixelShader * pixelShader)2085 void Renderer::loadConstants(const PixelShader *pixelShader) 2086 { 2087 if(!pixelShader) return; 2088 2089 size_t count = pixelShader->getLength(); 2090 2091 for(size_t i = 0; i < count; i++) 2092 { 2093 const Shader::Instruction *instruction = pixelShader->getInstruction(i); 2094 2095 if(instruction->opcode == Shader::OPCODE_DEF) 2096 { 2097 int index = instruction->dst.index; 2098 float value[4]; 2099 2100 value[0] = instruction->src[0].value[0]; 2101 value[1] = instruction->src[0].value[1]; 2102 value[2] = instruction->src[0].value[2]; 2103 value[3] = instruction->src[0].value[3]; 2104 2105 setPixelShaderConstantF(index, value); 2106 } 2107 else if(instruction->opcode == Shader::OPCODE_DEFI) 2108 { 2109 int index = instruction->dst.index; 2110 int integer[4]; 2111 2112 integer[0] = instruction->src[0].integer[0]; 2113 integer[1] = instruction->src[0].integer[1]; 2114 integer[2] = instruction->src[0].integer[2]; 2115 integer[3] = instruction->src[0].integer[3]; 2116 2117 setPixelShaderConstantI(index, integer); 2118 } 2119 else if(instruction->opcode == Shader::OPCODE_DEFB) 2120 { 2121 int index = instruction->dst.index; 2122 int boolean = instruction->src[0].boolean[0]; 2123 2124 setPixelShaderConstantB(index, &boolean); 2125 } 2126 } 2127 } 2128 setIndexBuffer(Resource * indexBuffer)2129 void Renderer::setIndexBuffer(Resource *indexBuffer) 2130 { 2131 context->indexBuffer = indexBuffer; 2132 } 2133 setMultiSampleMask(unsigned int mask)2134 void Renderer::setMultiSampleMask(unsigned int mask) 2135 { 2136 context->sampleMask = mask; 2137 } 2138 setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing)2139 void Renderer::setTransparencyAntialiasing(TransparencyAntialiasing transparencyAntialiasing) 2140 { 2141 sw::transparencyAntialiasing = transparencyAntialiasing; 2142 } 2143 isReadWriteTexture(int sampler)2144 bool Renderer::isReadWriteTexture(int sampler) 2145 { 2146 for(int index = 0; index < RENDERTARGETS; index++) 2147 { 2148 if(context->renderTarget[index] && context->texture[sampler] == context->renderTarget[index]->getResource()) 2149 { 2150 return true; 2151 } 2152 } 2153 2154 if(context->depthBuffer && context->texture[sampler] == context->depthBuffer->getResource()) 2155 { 2156 return true; 2157 } 2158 2159 return false; 2160 } 2161 updateClipper()2162 void Renderer::updateClipper() 2163 { 2164 if(updateClipPlanes) 2165 { 2166 if(VertexProcessor::isFixedFunction()) // User plane in world space 2167 { 2168 const Matrix &scissorWorld = getViewTransform(); 2169 2170 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = scissorWorld * userPlane[0]; 2171 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = scissorWorld * userPlane[1]; 2172 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = scissorWorld * userPlane[2]; 2173 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = scissorWorld * userPlane[3]; 2174 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = scissorWorld * userPlane[4]; 2175 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = scissorWorld * userPlane[5]; 2176 } 2177 else // User plane in clip space 2178 { 2179 if(clipFlags & Clipper::CLIP_PLANE0) clipPlane[0] = userPlane[0]; 2180 if(clipFlags & Clipper::CLIP_PLANE1) clipPlane[1] = userPlane[1]; 2181 if(clipFlags & Clipper::CLIP_PLANE2) clipPlane[2] = userPlane[2]; 2182 if(clipFlags & Clipper::CLIP_PLANE3) clipPlane[3] = userPlane[3]; 2183 if(clipFlags & Clipper::CLIP_PLANE4) clipPlane[4] = userPlane[4]; 2184 if(clipFlags & Clipper::CLIP_PLANE5) clipPlane[5] = userPlane[5]; 2185 } 2186 2187 updateClipPlanes = false; 2188 } 2189 } 2190 setTextureResource(unsigned int sampler,Resource * resource)2191 void Renderer::setTextureResource(unsigned int sampler, Resource *resource) 2192 { 2193 ASSERT(sampler < TOTAL_IMAGE_UNITS); 2194 2195 context->texture[sampler] = resource; 2196 } 2197 setTextureLevel(unsigned int sampler,unsigned int face,unsigned int level,Surface * surface,TextureType type)2198 void Renderer::setTextureLevel(unsigned int sampler, unsigned int face, unsigned int level, Surface *surface, TextureType type) 2199 { 2200 ASSERT(sampler < TOTAL_IMAGE_UNITS && face < 6 && level < MIPMAP_LEVELS); 2201 2202 context->sampler[sampler].setTextureLevel(face, level, surface, type); 2203 } 2204 setTextureFilter(SamplerType type,int sampler,FilterType textureFilter)2205 void Renderer::setTextureFilter(SamplerType type, int sampler, FilterType textureFilter) 2206 { 2207 if(type == SAMPLER_PIXEL) 2208 { 2209 PixelProcessor::setTextureFilter(sampler, textureFilter); 2210 } 2211 else 2212 { 2213 VertexProcessor::setTextureFilter(sampler, textureFilter); 2214 } 2215 } 2216 setMipmapFilter(SamplerType type,int sampler,MipmapType mipmapFilter)2217 void Renderer::setMipmapFilter(SamplerType type, int sampler, MipmapType mipmapFilter) 2218 { 2219 if(type == SAMPLER_PIXEL) 2220 { 2221 PixelProcessor::setMipmapFilter(sampler, mipmapFilter); 2222 } 2223 else 2224 { 2225 VertexProcessor::setMipmapFilter(sampler, mipmapFilter); 2226 } 2227 } 2228 setGatherEnable(SamplerType type,int sampler,bool enable)2229 void Renderer::setGatherEnable(SamplerType type, int sampler, bool enable) 2230 { 2231 if(type == SAMPLER_PIXEL) 2232 { 2233 PixelProcessor::setGatherEnable(sampler, enable); 2234 } 2235 else 2236 { 2237 VertexProcessor::setGatherEnable(sampler, enable); 2238 } 2239 } 2240 setAddressingModeU(SamplerType type,int sampler,AddressingMode addressMode)2241 void Renderer::setAddressingModeU(SamplerType type, int sampler, AddressingMode addressMode) 2242 { 2243 if(type == SAMPLER_PIXEL) 2244 { 2245 PixelProcessor::setAddressingModeU(sampler, addressMode); 2246 } 2247 else 2248 { 2249 VertexProcessor::setAddressingModeU(sampler, addressMode); 2250 } 2251 } 2252 setAddressingModeV(SamplerType type,int sampler,AddressingMode addressMode)2253 void Renderer::setAddressingModeV(SamplerType type, int sampler, AddressingMode addressMode) 2254 { 2255 if(type == SAMPLER_PIXEL) 2256 { 2257 PixelProcessor::setAddressingModeV(sampler, addressMode); 2258 } 2259 else 2260 { 2261 VertexProcessor::setAddressingModeV(sampler, addressMode); 2262 } 2263 } 2264 setAddressingModeW(SamplerType type,int sampler,AddressingMode addressMode)2265 void Renderer::setAddressingModeW(SamplerType type, int sampler, AddressingMode addressMode) 2266 { 2267 if(type == SAMPLER_PIXEL) 2268 { 2269 PixelProcessor::setAddressingModeW(sampler, addressMode); 2270 } 2271 else 2272 { 2273 VertexProcessor::setAddressingModeW(sampler, addressMode); 2274 } 2275 } 2276 setReadSRGB(SamplerType type,int sampler,bool sRGB)2277 void Renderer::setReadSRGB(SamplerType type, int sampler, bool sRGB) 2278 { 2279 if(type == SAMPLER_PIXEL) 2280 { 2281 PixelProcessor::setReadSRGB(sampler, sRGB); 2282 } 2283 else 2284 { 2285 VertexProcessor::setReadSRGB(sampler, sRGB); 2286 } 2287 } 2288 setMipmapLOD(SamplerType type,int sampler,float bias)2289 void Renderer::setMipmapLOD(SamplerType type, int sampler, float bias) 2290 { 2291 if(type == SAMPLER_PIXEL) 2292 { 2293 PixelProcessor::setMipmapLOD(sampler, bias); 2294 } 2295 else 2296 { 2297 VertexProcessor::setMipmapLOD(sampler, bias); 2298 } 2299 } 2300 setBorderColor(SamplerType type,int sampler,const Color<float> & borderColor)2301 void Renderer::setBorderColor(SamplerType type, int sampler, const Color<float> &borderColor) 2302 { 2303 if(type == SAMPLER_PIXEL) 2304 { 2305 PixelProcessor::setBorderColor(sampler, borderColor); 2306 } 2307 else 2308 { 2309 VertexProcessor::setBorderColor(sampler, borderColor); 2310 } 2311 } 2312 setMaxAnisotropy(SamplerType type,int sampler,float maxAnisotropy)2313 void Renderer::setMaxAnisotropy(SamplerType type, int sampler, float maxAnisotropy) 2314 { 2315 if(type == SAMPLER_PIXEL) 2316 { 2317 PixelProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2318 } 2319 else 2320 { 2321 VertexProcessor::setMaxAnisotropy(sampler, maxAnisotropy); 2322 } 2323 } 2324 setHighPrecisionFiltering(SamplerType type,int sampler,bool highPrecisionFiltering)2325 void Renderer::setHighPrecisionFiltering(SamplerType type, int sampler, bool highPrecisionFiltering) 2326 { 2327 if(type == SAMPLER_PIXEL) 2328 { 2329 PixelProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 2330 } 2331 else 2332 { 2333 VertexProcessor::setHighPrecisionFiltering(sampler, highPrecisionFiltering); 2334 } 2335 } 2336 setSwizzleR(SamplerType type,int sampler,SwizzleType swizzleR)2337 void Renderer::setSwizzleR(SamplerType type, int sampler, SwizzleType swizzleR) 2338 { 2339 if(type == SAMPLER_PIXEL) 2340 { 2341 PixelProcessor::setSwizzleR(sampler, swizzleR); 2342 } 2343 else 2344 { 2345 VertexProcessor::setSwizzleR(sampler, swizzleR); 2346 } 2347 } 2348 setSwizzleG(SamplerType type,int sampler,SwizzleType swizzleG)2349 void Renderer::setSwizzleG(SamplerType type, int sampler, SwizzleType swizzleG) 2350 { 2351 if(type == SAMPLER_PIXEL) 2352 { 2353 PixelProcessor::setSwizzleG(sampler, swizzleG); 2354 } 2355 else 2356 { 2357 VertexProcessor::setSwizzleG(sampler, swizzleG); 2358 } 2359 } 2360 setSwizzleB(SamplerType type,int sampler,SwizzleType swizzleB)2361 void Renderer::setSwizzleB(SamplerType type, int sampler, SwizzleType swizzleB) 2362 { 2363 if(type == SAMPLER_PIXEL) 2364 { 2365 PixelProcessor::setSwizzleB(sampler, swizzleB); 2366 } 2367 else 2368 { 2369 VertexProcessor::setSwizzleB(sampler, swizzleB); 2370 } 2371 } 2372 setSwizzleA(SamplerType type,int sampler,SwizzleType swizzleA)2373 void Renderer::setSwizzleA(SamplerType type, int sampler, SwizzleType swizzleA) 2374 { 2375 if(type == SAMPLER_PIXEL) 2376 { 2377 PixelProcessor::setSwizzleA(sampler, swizzleA); 2378 } 2379 else 2380 { 2381 VertexProcessor::setSwizzleA(sampler, swizzleA); 2382 } 2383 } 2384 setBaseLevel(SamplerType type,int sampler,int baseLevel)2385 void Renderer::setBaseLevel(SamplerType type, int sampler, int baseLevel) 2386 { 2387 if(type == SAMPLER_PIXEL) 2388 { 2389 PixelProcessor::setBaseLevel(sampler, baseLevel); 2390 } 2391 else 2392 { 2393 VertexProcessor::setBaseLevel(sampler, baseLevel); 2394 } 2395 } 2396 setMaxLevel(SamplerType type,int sampler,int maxLevel)2397 void Renderer::setMaxLevel(SamplerType type, int sampler, int maxLevel) 2398 { 2399 if(type == SAMPLER_PIXEL) 2400 { 2401 PixelProcessor::setMaxLevel(sampler, maxLevel); 2402 } 2403 else 2404 { 2405 VertexProcessor::setMaxLevel(sampler, maxLevel); 2406 } 2407 } 2408 setMinLod(SamplerType type,int sampler,float minLod)2409 void Renderer::setMinLod(SamplerType type, int sampler, float minLod) 2410 { 2411 if(type == SAMPLER_PIXEL) 2412 { 2413 PixelProcessor::setMinLod(sampler, minLod); 2414 } 2415 else 2416 { 2417 VertexProcessor::setMinLod(sampler, minLod); 2418 } 2419 } 2420 setMaxLod(SamplerType type,int sampler,float maxLod)2421 void Renderer::setMaxLod(SamplerType type, int sampler, float maxLod) 2422 { 2423 if(type == SAMPLER_PIXEL) 2424 { 2425 PixelProcessor::setMaxLod(sampler, maxLod); 2426 } 2427 else 2428 { 2429 VertexProcessor::setMaxLod(sampler, maxLod); 2430 } 2431 } 2432 setPointSpriteEnable(bool pointSpriteEnable)2433 void Renderer::setPointSpriteEnable(bool pointSpriteEnable) 2434 { 2435 context->setPointSpriteEnable(pointSpriteEnable); 2436 } 2437 setPointScaleEnable(bool pointScaleEnable)2438 void Renderer::setPointScaleEnable(bool pointScaleEnable) 2439 { 2440 context->setPointScaleEnable(pointScaleEnable); 2441 } 2442 setLineWidth(float width)2443 void Renderer::setLineWidth(float width) 2444 { 2445 context->lineWidth = width; 2446 } 2447 setDepthBias(float bias)2448 void Renderer::setDepthBias(float bias) 2449 { 2450 depthBias = bias; 2451 } 2452 setSlopeDepthBias(float slopeBias)2453 void Renderer::setSlopeDepthBias(float slopeBias) 2454 { 2455 slopeDepthBias = slopeBias; 2456 } 2457 setRasterizerDiscard(bool rasterizerDiscard)2458 void Renderer::setRasterizerDiscard(bool rasterizerDiscard) 2459 { 2460 context->rasterizerDiscard = rasterizerDiscard; 2461 } 2462 setPixelShader(const PixelShader * shader)2463 void Renderer::setPixelShader(const PixelShader *shader) 2464 { 2465 context->pixelShader = shader; 2466 2467 loadConstants(shader); 2468 } 2469 setVertexShader(const VertexShader * shader)2470 void Renderer::setVertexShader(const VertexShader *shader) 2471 { 2472 context->vertexShader = shader; 2473 2474 loadConstants(shader); 2475 } 2476 setPixelShaderConstantF(unsigned int index,const float value[4],unsigned int count)2477 void Renderer::setPixelShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2478 { 2479 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2480 { 2481 if(drawCall[i]->psDirtyConstF < index + count) 2482 { 2483 drawCall[i]->psDirtyConstF = index + count; 2484 } 2485 } 2486 2487 for(unsigned int i = 0; i < count; i++) 2488 { 2489 PixelProcessor::setFloatConstant(index + i, value); 2490 value += 4; 2491 } 2492 } 2493 setPixelShaderConstantI(unsigned int index,const int value[4],unsigned int count)2494 void Renderer::setPixelShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2495 { 2496 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2497 { 2498 if(drawCall[i]->psDirtyConstI < index + count) 2499 { 2500 drawCall[i]->psDirtyConstI = index + count; 2501 } 2502 } 2503 2504 for(unsigned int i = 0; i < count; i++) 2505 { 2506 PixelProcessor::setIntegerConstant(index + i, value); 2507 value += 4; 2508 } 2509 } 2510 setPixelShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2511 void Renderer::setPixelShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2512 { 2513 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2514 { 2515 if(drawCall[i]->psDirtyConstB < index + count) 2516 { 2517 drawCall[i]->psDirtyConstB = index + count; 2518 } 2519 } 2520 2521 for(unsigned int i = 0; i < count; i++) 2522 { 2523 PixelProcessor::setBooleanConstant(index + i, *boolean); 2524 boolean++; 2525 } 2526 } 2527 setVertexShaderConstantF(unsigned int index,const float value[4],unsigned int count)2528 void Renderer::setVertexShaderConstantF(unsigned int index, const float value[4], unsigned int count) 2529 { 2530 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2531 { 2532 if(drawCall[i]->vsDirtyConstF < index + count) 2533 { 2534 drawCall[i]->vsDirtyConstF = index + count; 2535 } 2536 } 2537 2538 for(unsigned int i = 0; i < count; i++) 2539 { 2540 VertexProcessor::setFloatConstant(index + i, value); 2541 value += 4; 2542 } 2543 } 2544 setVertexShaderConstantI(unsigned int index,const int value[4],unsigned int count)2545 void Renderer::setVertexShaderConstantI(unsigned int index, const int value[4], unsigned int count) 2546 { 2547 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2548 { 2549 if(drawCall[i]->vsDirtyConstI < index + count) 2550 { 2551 drawCall[i]->vsDirtyConstI = index + count; 2552 } 2553 } 2554 2555 for(unsigned int i = 0; i < count; i++) 2556 { 2557 VertexProcessor::setIntegerConstant(index + i, value); 2558 value += 4; 2559 } 2560 } 2561 setVertexShaderConstantB(unsigned int index,const int * boolean,unsigned int count)2562 void Renderer::setVertexShaderConstantB(unsigned int index, const int *boolean, unsigned int count) 2563 { 2564 for(unsigned int i = 0; i < DRAW_COUNT; i++) 2565 { 2566 if(drawCall[i]->vsDirtyConstB < index + count) 2567 { 2568 drawCall[i]->vsDirtyConstB = index + count; 2569 } 2570 } 2571 2572 for(unsigned int i = 0; i < count; i++) 2573 { 2574 VertexProcessor::setBooleanConstant(index + i, *boolean); 2575 boolean++; 2576 } 2577 } 2578 setModelMatrix(const Matrix & M,int i)2579 void Renderer::setModelMatrix(const Matrix &M, int i) 2580 { 2581 VertexProcessor::setModelMatrix(M, i); 2582 } 2583 setViewMatrix(const Matrix & V)2584 void Renderer::setViewMatrix(const Matrix &V) 2585 { 2586 VertexProcessor::setViewMatrix(V); 2587 updateClipPlanes = true; 2588 } 2589 setBaseMatrix(const Matrix & B)2590 void Renderer::setBaseMatrix(const Matrix &B) 2591 { 2592 VertexProcessor::setBaseMatrix(B); 2593 updateClipPlanes = true; 2594 } 2595 setProjectionMatrix(const Matrix & P)2596 void Renderer::setProjectionMatrix(const Matrix &P) 2597 { 2598 VertexProcessor::setProjectionMatrix(P); 2599 updateClipPlanes = true; 2600 } 2601 addQuery(Query * query)2602 void Renderer::addQuery(Query *query) 2603 { 2604 queries.push_back(query); 2605 } 2606 removeQuery(Query * query)2607 void Renderer::removeQuery(Query *query) 2608 { 2609 queries.remove(query); 2610 } 2611 2612 #if PERF_HUD getThreadCount()2613 int Renderer::getThreadCount() 2614 { 2615 return threadCount; 2616 } 2617 getVertexTime(int thread)2618 int64_t Renderer::getVertexTime(int thread) 2619 { 2620 return vertexTime[thread]; 2621 } 2622 getSetupTime(int thread)2623 int64_t Renderer::getSetupTime(int thread) 2624 { 2625 return setupTime[thread]; 2626 } 2627 getPixelTime(int thread)2628 int64_t Renderer::getPixelTime(int thread) 2629 { 2630 return pixelTime[thread]; 2631 } 2632 resetTimers()2633 void Renderer::resetTimers() 2634 { 2635 for(int thread = 0; thread < threadCount; thread++) 2636 { 2637 vertexTime[thread] = 0; 2638 setupTime[thread] = 0; 2639 pixelTime[thread] = 0; 2640 } 2641 } 2642 #endif 2643 setViewport(const Viewport & viewport)2644 void Renderer::setViewport(const Viewport &viewport) 2645 { 2646 this->viewport = viewport; 2647 } 2648 setScissor(const Rect & scissor)2649 void Renderer::setScissor(const Rect &scissor) 2650 { 2651 this->scissor = scissor; 2652 } 2653 setClipFlags(int flags)2654 void Renderer::setClipFlags(int flags) 2655 { 2656 clipFlags = flags << 8; // Bottom 8 bits used by legacy frustum 2657 } 2658 setClipPlane(unsigned int index,const float plane[4])2659 void Renderer::setClipPlane(unsigned int index, const float plane[4]) 2660 { 2661 if(index < MAX_CLIP_PLANES) 2662 { 2663 userPlane[index] = plane; 2664 } 2665 else ASSERT(false); 2666 2667 updateClipPlanes = true; 2668 } 2669 updateConfiguration(bool initialUpdate)2670 void Renderer::updateConfiguration(bool initialUpdate) 2671 { 2672 bool newConfiguration = swiftConfig->hasNewConfiguration(); 2673 2674 if(newConfiguration || initialUpdate) 2675 { 2676 terminateThreads(); 2677 2678 SwiftConfig::Configuration configuration = {}; 2679 swiftConfig->getConfiguration(configuration); 2680 2681 precacheVertex = !newConfiguration && configuration.precache; 2682 precacheSetup = !newConfiguration && configuration.precache; 2683 precachePixel = !newConfiguration && configuration.precache; 2684 2685 VertexProcessor::setRoutineCacheSize(configuration.vertexRoutineCacheSize); 2686 PixelProcessor::setRoutineCacheSize(configuration.pixelRoutineCacheSize); 2687 SetupProcessor::setRoutineCacheSize(configuration.setupRoutineCacheSize); 2688 2689 switch(configuration.textureSampleQuality) 2690 { 2691 case 0: Sampler::setFilterQuality(FILTER_POINT); break; 2692 case 1: Sampler::setFilterQuality(FILTER_LINEAR); break; 2693 case 2: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2694 default: Sampler::setFilterQuality(FILTER_ANISOTROPIC); break; 2695 } 2696 2697 switch(configuration.mipmapQuality) 2698 { 2699 case 0: Sampler::setMipmapQuality(MIPMAP_POINT); break; 2700 case 1: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2701 default: Sampler::setMipmapQuality(MIPMAP_LINEAR); break; 2702 } 2703 2704 setPerspectiveCorrection(configuration.perspectiveCorrection); 2705 2706 switch(configuration.transcendentalPrecision) 2707 { 2708 case 0: 2709 logPrecision = APPROXIMATE; 2710 expPrecision = APPROXIMATE; 2711 rcpPrecision = APPROXIMATE; 2712 rsqPrecision = APPROXIMATE; 2713 break; 2714 case 1: 2715 logPrecision = PARTIAL; 2716 expPrecision = PARTIAL; 2717 rcpPrecision = PARTIAL; 2718 rsqPrecision = PARTIAL; 2719 break; 2720 case 2: 2721 logPrecision = ACCURATE; 2722 expPrecision = ACCURATE; 2723 rcpPrecision = ACCURATE; 2724 rsqPrecision = ACCURATE; 2725 break; 2726 case 3: 2727 logPrecision = WHQL; 2728 expPrecision = WHQL; 2729 rcpPrecision = WHQL; 2730 rsqPrecision = WHQL; 2731 break; 2732 case 4: 2733 logPrecision = IEEE; 2734 expPrecision = IEEE; 2735 rcpPrecision = IEEE; 2736 rsqPrecision = IEEE; 2737 break; 2738 default: 2739 logPrecision = ACCURATE; 2740 expPrecision = ACCURATE; 2741 rcpPrecision = ACCURATE; 2742 rsqPrecision = ACCURATE; 2743 break; 2744 } 2745 2746 switch(configuration.transparencyAntialiasing) 2747 { 2748 case 0: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2749 case 1: transparencyAntialiasing = TRANSPARENCY_ALPHA_TO_COVERAGE; break; 2750 default: transparencyAntialiasing = TRANSPARENCY_NONE; break; 2751 } 2752 2753 switch(configuration.threadCount) 2754 { 2755 case -1: threadCount = CPUID::coreCount(); break; 2756 case 0: threadCount = CPUID::processAffinity(); break; 2757 default: threadCount = configuration.threadCount; break; 2758 } 2759 2760 CPUID::setEnableSSE4_1(configuration.enableSSE4_1); 2761 CPUID::setEnableSSSE3(configuration.enableSSSE3); 2762 CPUID::setEnableSSE3(configuration.enableSSE3); 2763 CPUID::setEnableSSE2(configuration.enableSSE2); 2764 CPUID::setEnableSSE(configuration.enableSSE); 2765 2766 for(int pass = 0; pass < 10; pass++) 2767 { 2768 optimization[pass] = configuration.optimization[pass]; 2769 } 2770 2771 forceWindowed = configuration.forceWindowed; 2772 complementaryDepthBuffer = configuration.complementaryDepthBuffer; 2773 postBlendSRGB = configuration.postBlendSRGB; 2774 exactColorRounding = configuration.exactColorRounding; 2775 forceClearRegisters = configuration.forceClearRegisters; 2776 2777 #ifndef NDEBUG 2778 minPrimitives = configuration.minPrimitives; 2779 maxPrimitives = configuration.maxPrimitives; 2780 #endif 2781 } 2782 2783 if(!initialUpdate && !worker[0]) 2784 { 2785 initializeThreads(); 2786 } 2787 } 2788 } 2789