1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelProgram.hpp" 16 17 #include "SamplerCore.hpp" 18 #include "Renderer/Primitive.hpp" 19 #include "Renderer/Renderer.hpp" 20 21 namespace sw 22 { 23 extern bool postBlendSRGB; 24 extern bool booleanFaceRegister; 25 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 26 extern bool fullPixelPositionRegister; 27 PixelProgram(const PixelProcessor::State & state,const PixelShader * shader)28 PixelProgram::PixelProgram(const PixelProcessor::State &state, const PixelShader *shader) : 29 PixelRoutine(state, shader), 30 r(shader->indirectAddressableTemporaries), 31 aL(shader->getLimits().loops), 32 increment(shader->getLimits().loops), 33 iteration(shader->getLimits().loops), 34 callStack(shader->getLimits().stack) 35 { 36 auto limits = shader->getLimits(); 37 ifFalseBlock.resize(limits.ifs); 38 loopRepTestBlock.resize(limits.loops); 39 loopRepEndBlock.resize(limits.loops); 40 labelBlock.resize(limits.maxLabel + 1); 41 isConditionalIf.resize(limits.ifs); 42 43 loopDepth = -1; 44 enableStack[0] = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 45 46 if(shader->containsBreakInstruction()) 47 { 48 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 49 } 50 51 if(shader->containsContinueInstruction()) 52 { 53 enableContinue = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 54 } 55 } 56 setBuiltins(Int & x,Int & y,Float4 (& z)[4],Float4 & w)57 void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) 58 { 59 if(shader->getShaderModel() >= 0x0300) 60 { 61 if(shader->isVPosDeclared()) 62 { 63 if(!halfIntegerCoordinates) 64 { 65 vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1); 66 vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1); 67 } 68 else 69 { 70 vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f); 71 vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f); 72 } 73 74 if(fullPixelPositionRegister) 75 { 76 vPos.z = z[0]; // FIXME: Centroid? 77 vPos.w = w; // FIXME: Centroid? 78 } 79 } 80 81 if(shader->isVFaceDeclared()) 82 { 83 Float4 face = *Pointer<Float>(primitive + OFFSET(Primitive, area)); 84 85 if(booleanFaceRegister) 86 { 87 face = As<Float4>(state.frontFaceCCW ? CmpNLT(face, Float4(0.0f)) : CmpLT(face, Float4(0.0f))); 88 } 89 90 vFace.x = face; 91 vFace.y = face; 92 vFace.z = face; 93 vFace.w = face; 94 } 95 } 96 } 97 applyShader(Int cMask[4])98 void PixelProgram::applyShader(Int cMask[4]) 99 { 100 enableIndex = 0; 101 stackIndex = 0; 102 103 if(shader->containsLeaveInstruction()) 104 { 105 enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 106 } 107 108 for(int i = 0; i < RENDERTARGETS; i++) 109 { 110 if(state.targetFormat[i] != FORMAT_NULL) 111 { 112 oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f); 113 } 114 } 115 116 // Create all call site return blocks up front 117 for(size_t i = 0; i < shader->getLength(); i++) 118 { 119 const Shader::Instruction *instruction = shader->getInstruction(i); 120 Shader::Opcode opcode = instruction->opcode; 121 122 if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ) 123 { 124 const Dst &dst = instruction->dst; 125 126 ASSERT(callRetBlock[dst.label].size() == dst.callSite); 127 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock()); 128 } 129 } 130 131 bool broadcastColor0 = true; 132 133 for(size_t i = 0; i < shader->getLength(); i++) 134 { 135 const Shader::Instruction *instruction = shader->getInstruction(i); 136 Shader::Opcode opcode = instruction->opcode; 137 138 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) 139 { 140 continue; 141 } 142 143 const Dst &dst = instruction->dst; 144 const Src &src0 = instruction->src[0]; 145 const Src &src1 = instruction->src[1]; 146 const Src &src2 = instruction->src[2]; 147 const Src &src3 = instruction->src[3]; 148 const Src &src4 = instruction->src[4]; 149 150 bool predicate = instruction->predicate; 151 Control control = instruction->control; 152 bool pp = dst.partialPrecision; 153 bool project = instruction->project; 154 bool bias = instruction->bias; 155 156 Vector4f d; 157 Vector4f s0; 158 Vector4f s1; 159 Vector4f s2; 160 Vector4f s3; 161 Vector4f s4; 162 163 if(opcode == Shader::OPCODE_TEXKILL) // Takes destination as input 164 { 165 if(dst.type == Shader::PARAMETER_TEXTURE) 166 { 167 d.x = v[2 + dst.index].x; 168 d.y = v[2 + dst.index].y; 169 d.z = v[2 + dst.index].z; 170 d.w = v[2 + dst.index].w; 171 } 172 else 173 { 174 d = r[dst.index]; 175 } 176 } 177 178 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0); 179 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1); 180 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2); 181 if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3); 182 if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4); 183 184 switch(opcode) 185 { 186 case Shader::OPCODE_PS_2_0: break; 187 case Shader::OPCODE_PS_2_x: break; 188 case Shader::OPCODE_PS_3_0: break; 189 case Shader::OPCODE_DEF: break; 190 case Shader::OPCODE_DCL: break; 191 case Shader::OPCODE_NOP: break; 192 case Shader::OPCODE_MOV: mov(d, s0); break; 193 case Shader::OPCODE_NEG: neg(d, s0); break; 194 case Shader::OPCODE_INEG: ineg(d, s0); break; 195 case Shader::OPCODE_F2B: f2b(d, s0); break; 196 case Shader::OPCODE_B2F: b2f(d, s0); break; 197 case Shader::OPCODE_F2I: f2i(d, s0); break; 198 case Shader::OPCODE_I2F: i2f(d, s0); break; 199 case Shader::OPCODE_F2U: f2u(d, s0); break; 200 case Shader::OPCODE_U2F: u2f(d, s0); break; 201 case Shader::OPCODE_I2B: i2b(d, s0); break; 202 case Shader::OPCODE_B2I: b2i(d, s0); break; 203 case Shader::OPCODE_ADD: add(d, s0, s1); break; 204 case Shader::OPCODE_IADD: iadd(d, s0, s1); break; 205 case Shader::OPCODE_SUB: sub(d, s0, s1); break; 206 case Shader::OPCODE_ISUB: isub(d, s0, s1); break; 207 case Shader::OPCODE_MUL: mul(d, s0, s1); break; 208 case Shader::OPCODE_IMUL: imul(d, s0, s1); break; 209 case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break; 210 case Shader::OPCODE_IMAD: imad(d, s0, s1, s2); break; 211 case Shader::OPCODE_DP1: dp1(d, s0, s1); break; 212 case Shader::OPCODE_DP2: dp2(d, s0, s1); break; 213 case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break; 214 case Shader::OPCODE_DP3: dp3(d, s0, s1); break; 215 case Shader::OPCODE_DP4: dp4(d, s0, s1); break; 216 case Shader::OPCODE_DET2: det2(d, s0, s1); break; 217 case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break; 218 case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break; 219 case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break; 220 case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; 221 case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break; 222 case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break; 223 case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break; 224 case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break; 225 case Shader::OPCODE_FRC: frc(d, s0); break; 226 case Shader::OPCODE_TRUNC: trunc(d, s0); break; 227 case Shader::OPCODE_FLOOR: floor(d, s0); break; 228 case Shader::OPCODE_ROUND: round(d, s0); break; 229 case Shader::OPCODE_ROUNDEVEN: roundEven(d, s0); break; 230 case Shader::OPCODE_CEIL: ceil(d, s0); break; 231 case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; 232 case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; 233 case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break; 234 case Shader::OPCODE_LOG2: log2(d, s0, pp); break; 235 case Shader::OPCODE_EXP: exp(d, s0, pp); break; 236 case Shader::OPCODE_LOG: log(d, s0, pp); break; 237 case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break; 238 case Shader::OPCODE_DIV: div(d, s0, s1); break; 239 case Shader::OPCODE_IDIV: idiv(d, s0, s1); break; 240 case Shader::OPCODE_UDIV: udiv(d, s0, s1); break; 241 case Shader::OPCODE_MOD: mod(d, s0, s1); break; 242 case Shader::OPCODE_IMOD: imod(d, s0, s1); break; 243 case Shader::OPCODE_UMOD: umod(d, s0, s1); break; 244 case Shader::OPCODE_SHL: shl(d, s0, s1); break; 245 case Shader::OPCODE_ISHR: ishr(d, s0, s1); break; 246 case Shader::OPCODE_USHR: ushr(d, s0, s1); break; 247 case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break; 248 case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break; 249 case Shader::OPCODE_RSQ: rsq(d, s0, pp); break; 250 case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break; 251 case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break; 252 case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break; 253 case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break; 254 case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break; 255 case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break; 256 case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break; 257 case Shader::OPCODE_MIN: min(d, s0, s1); break; 258 case Shader::OPCODE_IMIN: imin(d, s0, s1); break; 259 case Shader::OPCODE_UMIN: umin(d, s0, s1); break; 260 case Shader::OPCODE_MAX: max(d, s0, s1); break; 261 case Shader::OPCODE_IMAX: imax(d, s0, s1); break; 262 case Shader::OPCODE_UMAX: umax(d, s0, s1); break; 263 case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break; 264 case Shader::OPCODE_STEP: step(d, s0, s1); break; 265 case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break; 266 case Shader::OPCODE_ISINF: isinf(d, s0); break; 267 case Shader::OPCODE_ISNAN: isnan(d, s0); break; 268 case Shader::OPCODE_FLOATBITSTOINT: 269 case Shader::OPCODE_FLOATBITSTOUINT: 270 case Shader::OPCODE_INTBITSTOFLOAT: 271 case Shader::OPCODE_UINTBITSTOFLOAT: d = s0; break; 272 case Shader::OPCODE_PACKSNORM2x16: packSnorm2x16(d, s0); break; 273 case Shader::OPCODE_PACKUNORM2x16: packUnorm2x16(d, s0); break; 274 case Shader::OPCODE_PACKHALF2x16: packHalf2x16(d, s0); break; 275 case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0); break; 276 case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0); break; 277 case Shader::OPCODE_UNPACKHALF2x16: unpackHalf2x16(d, s0); break; 278 case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break; 279 case Shader::OPCODE_POW: pow(d, s0, s1, pp); break; 280 case Shader::OPCODE_SGN: sgn(d, s0); break; 281 case Shader::OPCODE_ISGN: isgn(d, s0); break; 282 case Shader::OPCODE_CRS: crs(d, s0, s1); break; 283 case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break; 284 case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break; 285 case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break; 286 case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break; 287 case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break; 288 case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break; 289 case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break; 290 case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break; 291 case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break; 292 case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break; 293 case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break; 294 case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break; 295 case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break; 296 case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break; 297 case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break; 298 case Shader::OPCODE_ABS: abs(d, s0); break; 299 case Shader::OPCODE_IABS: iabs(d, s0); break; 300 case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break; 301 case Shader::OPCODE_COS: cos(d, s0, pp); break; 302 case Shader::OPCODE_SIN: sin(d, s0, pp); break; 303 case Shader::OPCODE_TAN: tan(d, s0, pp); break; 304 case Shader::OPCODE_ACOS: acos(d, s0, pp); break; 305 case Shader::OPCODE_ASIN: asin(d, s0, pp); break; 306 case Shader::OPCODE_ATAN: atan(d, s0, pp); break; 307 case Shader::OPCODE_ATAN2: atan2(d, s0, s1, pp); break; 308 case Shader::OPCODE_COSH: cosh(d, s0, pp); break; 309 case Shader::OPCODE_SINH: sinh(d, s0, pp); break; 310 case Shader::OPCODE_TANH: tanh(d, s0, pp); break; 311 case Shader::OPCODE_ACOSH: acosh(d, s0, pp); break; 312 case Shader::OPCODE_ASINH: asinh(d, s0, pp); break; 313 case Shader::OPCODE_ATANH: atanh(d, s0, pp); break; 314 case Shader::OPCODE_M4X4: M4X4(d, s0, src1); break; 315 case Shader::OPCODE_M4X3: M4X3(d, s0, src1); break; 316 case Shader::OPCODE_M3X4: M3X4(d, s0, src1); break; 317 case Shader::OPCODE_M3X3: M3X3(d, s0, src1); break; 318 case Shader::OPCODE_M3X2: M3X2(d, s0, src1); break; 319 case Shader::OPCODE_TEX: TEX(d, s0, src1, project, bias); break; 320 case Shader::OPCODE_TEXLDD: TEXGRAD(d, s0, src1, s2, s3); break; 321 case Shader::OPCODE_TEXLDL: TEXLOD(d, s0, src1, s0.w); break; 322 case Shader::OPCODE_TEXLOD: TEXLOD(d, s0, src1, s2.x); break; 323 case Shader::OPCODE_TEXSIZE: TEXSIZE(d, s0.x, src1); break; 324 case Shader::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break; 325 case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2); break; 326 case Shader::OPCODE_TEXLODOFFSET: TEXLODOFFSET(d, s0, src1, s2, s3.x); break; 327 case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1, s2.x); break; 328 case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCHOFFSET(d, s0, src1, s2, s3.x); break; 329 case Shader::OPCODE_TEXGRAD: TEXGRAD(d, s0, src1, s2, s3); break; 330 case Shader::OPCODE_TEXGRADOFFSET: TEXGRADOFFSET(d, s0, src1, s2, s3, s4); break; 331 case Shader::OPCODE_TEXBIAS: TEXBIAS(d, s0, src1, s2.x); break; 332 case Shader::OPCODE_TEXOFFSETBIAS: TEXOFFSETBIAS(d, s0, src1, s2, s3.x); break; 333 case Shader::OPCODE_DISCARD: DISCARD(cMask, instruction); break; 334 case Shader::OPCODE_DFDX: DFDX(d, s0); break; 335 case Shader::OPCODE_DFDY: DFDY(d, s0); break; 336 case Shader::OPCODE_FWIDTH: FWIDTH(d, s0); break; 337 case Shader::OPCODE_BREAK: BREAK(); break; 338 case Shader::OPCODE_BREAKC: BREAKC(s0, s1, control); break; 339 case Shader::OPCODE_BREAKP: BREAKP(src0); break; 340 case Shader::OPCODE_CONTINUE: CONTINUE(); break; 341 case Shader::OPCODE_TEST: TEST(); break; 342 case Shader::OPCODE_SCALAR: SCALAR(); break; 343 case Shader::OPCODE_CALL: CALL(dst.label, dst.callSite); break; 344 case Shader::OPCODE_CALLNZ: CALLNZ(dst.label, dst.callSite, src0); break; 345 case Shader::OPCODE_ELSE: ELSE(); break; 346 case Shader::OPCODE_ENDIF: ENDIF(); break; 347 case Shader::OPCODE_ENDLOOP: ENDLOOP(); break; 348 case Shader::OPCODE_ENDREP: ENDREP(); break; 349 case Shader::OPCODE_ENDWHILE: ENDWHILE(); break; 350 case Shader::OPCODE_ENDSWITCH: ENDSWITCH(); break; 351 case Shader::OPCODE_IF: IF(src0); break; 352 case Shader::OPCODE_IFC: IFC(s0, s1, control); break; 353 case Shader::OPCODE_LABEL: LABEL(dst.index); break; 354 case Shader::OPCODE_LOOP: LOOP(src1); break; 355 case Shader::OPCODE_REP: REP(src0); break; 356 case Shader::OPCODE_WHILE: WHILE(src0); break; 357 case Shader::OPCODE_SWITCH: SWITCH(); break; 358 case Shader::OPCODE_RET: RET(); break; 359 case Shader::OPCODE_LEAVE: LEAVE(); break; 360 case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break; 361 case Shader::OPCODE_ALL: all(d.x, s0); break; 362 case Shader::OPCODE_ANY: any(d.x, s0); break; 363 case Shader::OPCODE_NOT: bitwise_not(d, s0); break; 364 case Shader::OPCODE_OR: bitwise_or(d, s0, s1); break; 365 case Shader::OPCODE_XOR: bitwise_xor(d, s0, s1); break; 366 case Shader::OPCODE_AND: bitwise_and(d, s0, s1); break; 367 case Shader::OPCODE_EQ: equal(d, s0, s1); break; 368 case Shader::OPCODE_NE: notEqual(d, s0, s1); break; 369 case Shader::OPCODE_END: break; 370 default: 371 ASSERT(false); 372 } 373 374 if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP) 375 { 376 if(dst.saturate) 377 { 378 if(dst.x) d.x = Max(d.x, Float4(0.0f)); 379 if(dst.y) d.y = Max(d.y, Float4(0.0f)); 380 if(dst.z) d.z = Max(d.z, Float4(0.0f)); 381 if(dst.w) d.w = Max(d.w, Float4(0.0f)); 382 383 if(dst.x) d.x = Min(d.x, Float4(1.0f)); 384 if(dst.y) d.y = Min(d.y, Float4(1.0f)); 385 if(dst.z) d.z = Min(d.z, Float4(1.0f)); 386 if(dst.w) d.w = Min(d.w, Float4(1.0f)); 387 } 388 389 if(instruction->isPredicated()) 390 { 391 Vector4f pDst; // FIXME: Rename 392 393 switch(dst.type) 394 { 395 case Shader::PARAMETER_TEMP: 396 if(dst.rel.type == Shader::PARAMETER_VOID) 397 { 398 if(dst.x) pDst.x = r[dst.index].x; 399 if(dst.y) pDst.y = r[dst.index].y; 400 if(dst.z) pDst.z = r[dst.index].z; 401 if(dst.w) pDst.w = r[dst.index].w; 402 } 403 else if(!dst.rel.dynamic) 404 { 405 Int a = dst.index + relativeAddress(dst.rel); 406 407 if(dst.x) pDst.x = r[a].x; 408 if(dst.y) pDst.y = r[a].y; 409 if(dst.z) pDst.z = r[a].z; 410 if(dst.w) pDst.w = r[a].w; 411 } 412 else 413 { 414 Int4 a = dst.index + dynamicAddress(dst.rel); 415 416 if(dst.x) pDst.x = r[a].x; 417 if(dst.y) pDst.y = r[a].y; 418 if(dst.z) pDst.z = r[a].z; 419 if(dst.w) pDst.w = r[a].w; 420 } 421 break; 422 case Shader::PARAMETER_COLOROUT: 423 if(dst.rel.type == Shader::PARAMETER_VOID) 424 { 425 if(dst.x) pDst.x = oC[dst.index].x; 426 if(dst.y) pDst.y = oC[dst.index].y; 427 if(dst.z) pDst.z = oC[dst.index].z; 428 if(dst.w) pDst.w = oC[dst.index].w; 429 } 430 else if(!dst.rel.dynamic) 431 { 432 Int a = dst.index + relativeAddress(dst.rel); 433 434 if(dst.x) pDst.x = oC[a].x; 435 if(dst.y) pDst.y = oC[a].y; 436 if(dst.z) pDst.z = oC[a].z; 437 if(dst.w) pDst.w = oC[a].w; 438 } 439 else 440 { 441 Int4 a = dst.index + dynamicAddress(dst.rel); 442 443 if(dst.x) pDst.x = oC[a].x; 444 if(dst.y) pDst.y = oC[a].y; 445 if(dst.z) pDst.z = oC[a].z; 446 if(dst.w) pDst.w = oC[a].w; 447 } 448 break; 449 case Shader::PARAMETER_PREDICATE: 450 if(dst.x) pDst.x = p0.x; 451 if(dst.y) pDst.y = p0.y; 452 if(dst.z) pDst.z = p0.z; 453 if(dst.w) pDst.w = p0.w; 454 break; 455 case Shader::PARAMETER_DEPTHOUT: 456 pDst.x = oDepth; 457 break; 458 default: 459 ASSERT(false); 460 } 461 462 Int4 enable = enableMask(instruction); 463 464 Int4 xEnable = enable; 465 Int4 yEnable = enable; 466 Int4 zEnable = enable; 467 Int4 wEnable = enable; 468 469 if(predicate) 470 { 471 unsigned char pSwizzle = instruction->predicateSwizzle; 472 473 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03]; 474 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03]; 475 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03]; 476 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03]; 477 478 if(!instruction->predicateNot) 479 { 480 if(dst.x) xEnable = xEnable & As<Int4>(xPredicate); 481 if(dst.y) yEnable = yEnable & As<Int4>(yPredicate); 482 if(dst.z) zEnable = zEnable & As<Int4>(zPredicate); 483 if(dst.w) wEnable = wEnable & As<Int4>(wPredicate); 484 } 485 else 486 { 487 if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate); 488 if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate); 489 if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate); 490 if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate); 491 } 492 } 493 494 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); 495 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); 496 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); 497 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); 498 499 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); 500 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); 501 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); 502 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); 503 } 504 505 switch(dst.type) 506 { 507 case Shader::PARAMETER_TEMP: 508 if(dst.rel.type == Shader::PARAMETER_VOID) 509 { 510 if(dst.x) r[dst.index].x = d.x; 511 if(dst.y) r[dst.index].y = d.y; 512 if(dst.z) r[dst.index].z = d.z; 513 if(dst.w) r[dst.index].w = d.w; 514 } 515 else if(!dst.rel.dynamic) 516 { 517 Int a = dst.index + relativeAddress(dst.rel); 518 519 if(dst.x) r[a].x = d.x; 520 if(dst.y) r[a].y = d.y; 521 if(dst.z) r[a].z = d.z; 522 if(dst.w) r[a].w = d.w; 523 } 524 else 525 { 526 Int4 a = dst.index + dynamicAddress(dst.rel); 527 528 if(dst.x) r.scatter_x(a, d.x); 529 if(dst.y) r.scatter_y(a, d.y); 530 if(dst.z) r.scatter_z(a, d.z); 531 if(dst.w) r.scatter_w(a, d.w); 532 } 533 break; 534 case Shader::PARAMETER_COLOROUT: 535 if(dst.rel.type == Shader::PARAMETER_VOID) 536 { 537 broadcastColor0 = (dst.index == 0) && broadcastColor0; 538 539 if(dst.x) oC[dst.index].x = d.x; 540 if(dst.y) oC[dst.index].y = d.y; 541 if(dst.z) oC[dst.index].z = d.z; 542 if(dst.w) oC[dst.index].w = d.w; 543 } 544 else if(!dst.rel.dynamic) 545 { 546 broadcastColor0 = false; 547 Int a = dst.index + relativeAddress(dst.rel); 548 549 if(dst.x) oC[a].x = d.x; 550 if(dst.y) oC[a].y = d.y; 551 if(dst.z) oC[a].z = d.z; 552 if(dst.w) oC[a].w = d.w; 553 } 554 else 555 { 556 broadcastColor0 = false; 557 Int4 a = dst.index + dynamicAddress(dst.rel); 558 559 if(dst.x) oC.scatter_x(a, d.x); 560 if(dst.y) oC.scatter_y(a, d.y); 561 if(dst.z) oC.scatter_z(a, d.z); 562 if(dst.w) oC.scatter_w(a, d.w); 563 } 564 break; 565 case Shader::PARAMETER_PREDICATE: 566 if(dst.x) p0.x = d.x; 567 if(dst.y) p0.y = d.y; 568 if(dst.z) p0.z = d.z; 569 if(dst.w) p0.w = d.w; 570 break; 571 case Shader::PARAMETER_DEPTHOUT: 572 oDepth = d.x; 573 break; 574 default: 575 ASSERT(false); 576 } 577 } 578 } 579 580 if(currentLabel != -1) 581 { 582 Nucleus::setInsertBlock(returnBlock); 583 } 584 585 if(broadcastColor0) 586 { 587 for(int i = 0; i < RENDERTARGETS; i++) 588 { 589 c[i] = oC[0]; 590 } 591 } 592 else 593 { 594 for(int i = 0; i < RENDERTARGETS; i++) 595 { 596 c[i] = oC[i]; 597 } 598 } 599 600 clampColor(c); 601 602 if(state.depthOverride) 603 { 604 oDepth = Min(Max(oDepth, Float4(0.0f)), Float4(1.0f)); 605 } 606 } 607 alphaTest(Int cMask[4])608 Bool PixelProgram::alphaTest(Int cMask[4]) 609 { 610 if(!state.alphaTestActive()) 611 { 612 return true; 613 } 614 615 Int aMask; 616 617 if(state.transparencyAntialiasing == TRANSPARENCY_NONE) 618 { 619 Short4 alpha = RoundShort4(c[0].w * Float4(0x1000)); 620 621 PixelRoutine::alphaTest(aMask, alpha); 622 623 for(unsigned int q = 0; q < state.multiSample; q++) 624 { 625 cMask[q] &= aMask; 626 } 627 } 628 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 629 { 630 alphaToCoverage(cMask, c[0].w); 631 } 632 else ASSERT(false); 633 634 Int pass = cMask[0]; 635 636 for(unsigned int q = 1; q < state.multiSample; q++) 637 { 638 pass = pass | cMask[q]; 639 } 640 641 return pass != 0x0; 642 } 643 rasterOperation(Float4 & fog,Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4])644 void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) 645 { 646 for(int index = 0; index < RENDERTARGETS; index++) 647 { 648 if(!state.colorWriteActive(index)) 649 { 650 continue; 651 } 652 653 if(!postBlendSRGB && state.writeSRGB && !isSRGB(index)) 654 { 655 c[index].x = linearToSRGB(c[index].x); 656 c[index].y = linearToSRGB(c[index].y); 657 c[index].z = linearToSRGB(c[index].z); 658 } 659 660 if(index == 0) 661 { 662 fogBlend(c[index], fog); 663 } 664 665 switch(state.targetFormat[index]) 666 { 667 case FORMAT_R5G6B5: 668 case FORMAT_X8R8G8B8: 669 case FORMAT_X8B8G8R8: 670 case FORMAT_A8R8G8B8: 671 case FORMAT_A8B8G8R8: 672 case FORMAT_SRGB8_X8: 673 case FORMAT_SRGB8_A8: 674 case FORMAT_G8R8: 675 case FORMAT_R8: 676 case FORMAT_A8: 677 case FORMAT_G16R16: 678 case FORMAT_A16B16G16R16: 679 for(unsigned int q = 0; q < state.multiSample; q++) 680 { 681 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 682 Vector4s color; 683 684 if(state.targetFormat[index] == FORMAT_R5G6B5) 685 { 686 color.x = UShort4(c[index].x * Float4(0xFBFF), false); 687 color.y = UShort4(c[index].y * Float4(0xFDFF), false); 688 color.z = UShort4(c[index].z * Float4(0xFBFF), false); 689 color.w = UShort4(c[index].w * Float4(0xFFFF), false); 690 } 691 else 692 { 693 color.x = convertFixed16(c[index].x, false); 694 color.y = convertFixed16(c[index].y, false); 695 color.z = convertFixed16(c[index].z, false); 696 color.w = convertFixed16(c[index].w, false); 697 } 698 699 if(state.multiSampleMask & (1 << q)) 700 { 701 alphaBlend(index, buffer, color, x); 702 logicOperation(index, buffer, color, x); 703 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 704 } 705 } 706 break; 707 case FORMAT_R32F: 708 case FORMAT_G32R32F: 709 case FORMAT_X32B32G32R32F: 710 case FORMAT_A32B32G32R32F: 711 case FORMAT_X32B32G32R32F_UNSIGNED: 712 case FORMAT_R32I: 713 case FORMAT_G32R32I: 714 case FORMAT_A32B32G32R32I: 715 case FORMAT_R32UI: 716 case FORMAT_G32R32UI: 717 case FORMAT_A32B32G32R32UI: 718 case FORMAT_R16I: 719 case FORMAT_G16R16I: 720 case FORMAT_A16B16G16R16I: 721 case FORMAT_R16UI: 722 case FORMAT_G16R16UI: 723 case FORMAT_A16B16G16R16UI: 724 case FORMAT_R8I: 725 case FORMAT_G8R8I: 726 case FORMAT_A8B8G8R8I: 727 case FORMAT_R8UI: 728 case FORMAT_G8R8UI: 729 case FORMAT_A8B8G8R8UI: 730 for(unsigned int q = 0; q < state.multiSample; q++) 731 { 732 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 733 Vector4f color = c[index]; 734 735 if(state.multiSampleMask & (1 << q)) 736 { 737 alphaBlend(index, buffer, color, x); 738 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 739 } 740 } 741 break; 742 default: 743 ASSERT(false); 744 } 745 } 746 } 747 sampleTexture(const Src & sampler,Vector4f & uvwq,Float4 & bias,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)748 Vector4f PixelProgram::sampleTexture(const Src &sampler, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 749 { 750 Vector4f tmp; 751 752 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID) 753 { 754 tmp = sampleTexture(sampler.index, uvwq, bias, dsx, dsy, offset, function); 755 } 756 else 757 { 758 Int index = As<Int>(Float(fetchRegister(sampler).x.x)); 759 760 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) 761 { 762 if(shader->usesSampler(i)) 763 { 764 If(index == i) 765 { 766 tmp = sampleTexture(i, uvwq, bias, dsx, dsy, offset, function); 767 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture 768 } 769 } 770 } 771 } 772 773 Vector4f c; 774 c.x = tmp[(sampler.swizzle >> 0) & 0x3]; 775 c.y = tmp[(sampler.swizzle >> 2) & 0x3]; 776 c.z = tmp[(sampler.swizzle >> 4) & 0x3]; 777 c.w = tmp[(sampler.swizzle >> 6) & 0x3]; 778 779 return c; 780 } 781 sampleTexture(int samplerIndex,Vector4f & uvwq,Float4 & bias,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)782 Vector4f PixelProgram::sampleTexture(int samplerIndex, Vector4f &uvwq, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 783 { 784 #if PERF_PROFILE 785 Long texTime = Ticks(); 786 #endif 787 788 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture); 789 Vector4f c = SamplerCore(constants, state.sampler[samplerIndex]).sampleTexture(texture, uvwq.x, uvwq.y, uvwq.z, uvwq.w, bias, dsx, dsy, offset, function); 790 791 #if PERF_PROFILE 792 cycles[PERF_TEX] += Ticks() - texTime; 793 #endif 794 795 return c; 796 } 797 clampColor(Vector4f oC[RENDERTARGETS])798 void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS]) 799 { 800 for(int index = 0; index < RENDERTARGETS; index++) 801 { 802 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive())) 803 { 804 continue; 805 } 806 807 switch(state.targetFormat[index]) 808 { 809 case FORMAT_NULL: 810 break; 811 case FORMAT_R5G6B5: 812 case FORMAT_A8R8G8B8: 813 case FORMAT_A8B8G8R8: 814 case FORMAT_X8R8G8B8: 815 case FORMAT_X8B8G8R8: 816 case FORMAT_SRGB8_X8: 817 case FORMAT_SRGB8_A8: 818 case FORMAT_G8R8: 819 case FORMAT_R8: 820 case FORMAT_A8: 821 case FORMAT_G16R16: 822 case FORMAT_A16B16G16R16: 823 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f)); 824 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f)); 825 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f)); 826 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f)); 827 break; 828 case FORMAT_R32F: 829 case FORMAT_G32R32F: 830 case FORMAT_X32B32G32R32F: 831 case FORMAT_A32B32G32R32F: 832 case FORMAT_R32I: 833 case FORMAT_G32R32I: 834 case FORMAT_A32B32G32R32I: 835 case FORMAT_R32UI: 836 case FORMAT_G32R32UI: 837 case FORMAT_A32B32G32R32UI: 838 case FORMAT_R16I: 839 case FORMAT_G16R16I: 840 case FORMAT_A16B16G16R16I: 841 case FORMAT_R16UI: 842 case FORMAT_G16R16UI: 843 case FORMAT_A16B16G16R16UI: 844 case FORMAT_R8I: 845 case FORMAT_G8R8I: 846 case FORMAT_A8B8G8R8I: 847 case FORMAT_R8UI: 848 case FORMAT_G8R8UI: 849 case FORMAT_A8B8G8R8UI: 850 break; 851 case FORMAT_X32B32G32R32F_UNSIGNED: 852 oC[index].x = Max(oC[index].x, Float4(0.0f)); 853 oC[index].y = Max(oC[index].y, Float4(0.0f)); 854 oC[index].z = Max(oC[index].z, Float4(0.0f)); 855 oC[index].w = Max(oC[index].w, Float4(0.0f)); 856 break; 857 default: 858 ASSERT(false); 859 } 860 } 861 } 862 enableMask(const Shader::Instruction * instruction)863 Int4 PixelProgram::enableMask(const Shader::Instruction *instruction) 864 { 865 if(scalar) 866 { 867 return Int4(0xFFFFFFFF); 868 } 869 870 Int4 enable = instruction->analysisBranch ? Int4(enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]) : Int4(0xFFFFFFFF); 871 872 if(shader->containsBreakInstruction() && instruction->analysisBreak) 873 { 874 enable &= enableBreak; 875 } 876 877 if(shader->containsContinueInstruction() && instruction->analysisContinue) 878 { 879 enable &= enableContinue; 880 } 881 882 if(shader->containsLeaveInstruction() && instruction->analysisLeave) 883 { 884 enable &= enableLeave; 885 } 886 887 return enable; 888 } 889 fetchRegister(const Src & src,unsigned int offset)890 Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset) 891 { 892 Vector4f reg; 893 unsigned int i = src.index + offset; 894 895 switch(src.type) 896 { 897 case Shader::PARAMETER_TEMP: 898 if(src.rel.type == Shader::PARAMETER_VOID) 899 { 900 reg = r[i]; 901 } 902 else if(!src.rel.dynamic) 903 { 904 reg = r[i + relativeAddress(src.rel, src.bufferIndex)]; 905 } 906 else 907 { 908 reg = r[i + dynamicAddress(src.rel)]; 909 } 910 break; 911 case Shader::PARAMETER_INPUT: 912 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 913 { 914 reg = v[i]; 915 } 916 else if(!src.rel.dynamic) 917 { 918 reg = v[i + relativeAddress(src.rel, src.bufferIndex)]; 919 } 920 else 921 { 922 reg = v[i + dynamicAddress(src.rel)]; 923 } 924 break; 925 case Shader::PARAMETER_CONST: 926 reg = readConstant(src, offset); 927 break; 928 case Shader::PARAMETER_TEXTURE: 929 reg = v[2 + i]; 930 break; 931 case Shader::PARAMETER_MISCTYPE: 932 if(src.index == Shader::VPosIndex) reg = vPos; 933 if(src.index == Shader::VFaceIndex) reg = vFace; 934 break; 935 case Shader::PARAMETER_SAMPLER: 936 if(src.rel.type == Shader::PARAMETER_VOID) 937 { 938 reg.x = As<Float4>(Int4(i)); 939 } 940 else if(src.rel.type == Shader::PARAMETER_TEMP) 941 { 942 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x)); 943 } 944 return reg; 945 case Shader::PARAMETER_PREDICATE: return reg; // Dummy 946 case Shader::PARAMETER_VOID: return reg; // Dummy 947 case Shader::PARAMETER_FLOAT4LITERAL: 948 // This is used for all literal types, and since Reactor doesn't guarantee 949 // preserving the bit pattern of float constants, we must construct them 950 // as integer constants and bitcast. 951 reg.x = As<Float4>(Int4(src.integer[0])); 952 reg.y = As<Float4>(Int4(src.integer[1])); 953 reg.z = As<Float4>(Int4(src.integer[2])); 954 reg.w = As<Float4>(Int4(src.integer[3])); 955 break; 956 case Shader::PARAMETER_CONSTINT: return reg; // Dummy 957 case Shader::PARAMETER_CONSTBOOL: return reg; // Dummy 958 case Shader::PARAMETER_LOOP: return reg; // Dummy 959 case Shader::PARAMETER_COLOROUT: 960 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 961 { 962 reg = oC[i]; 963 } 964 else if(!src.rel.dynamic) 965 { 966 reg = oC[i + relativeAddress(src.rel, src.bufferIndex)]; 967 } 968 else 969 { 970 reg = oC[i + dynamicAddress(src.rel)]; 971 } 972 break; 973 case Shader::PARAMETER_DEPTHOUT: 974 reg.x = oDepth; 975 break; 976 default: 977 ASSERT(false); 978 } 979 980 const Float4 &x = reg[(src.swizzle >> 0) & 0x3]; 981 const Float4 &y = reg[(src.swizzle >> 2) & 0x3]; 982 const Float4 &z = reg[(src.swizzle >> 4) & 0x3]; 983 const Float4 &w = reg[(src.swizzle >> 6) & 0x3]; 984 985 Vector4f mod; 986 987 switch(src.modifier) 988 { 989 case Shader::MODIFIER_NONE: 990 mod.x = x; 991 mod.y = y; 992 mod.z = z; 993 mod.w = w; 994 break; 995 case Shader::MODIFIER_NEGATE: 996 mod.x = -x; 997 mod.y = -y; 998 mod.z = -z; 999 mod.w = -w; 1000 break; 1001 case Shader::MODIFIER_ABS: 1002 mod.x = Abs(x); 1003 mod.y = Abs(y); 1004 mod.z = Abs(z); 1005 mod.w = Abs(w); 1006 break; 1007 case Shader::MODIFIER_ABS_NEGATE: 1008 mod.x = -Abs(x); 1009 mod.y = -Abs(y); 1010 mod.z = -Abs(z); 1011 mod.w = -Abs(w); 1012 break; 1013 case Shader::MODIFIER_NOT: 1014 mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF)); 1015 mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF)); 1016 mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF)); 1017 mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF)); 1018 break; 1019 default: 1020 ASSERT(false); 1021 } 1022 1023 return mod; 1024 } 1025 uniformAddress(int bufferIndex,unsigned int index)1026 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index) 1027 { 1028 if(bufferIndex == -1) 1029 { 1030 return data + OFFSET(DrawData, ps.c[index]); 1031 } 1032 else 1033 { 1034 return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index; 1035 } 1036 } 1037 uniformAddress(int bufferIndex,unsigned int index,Int & offset)1038 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset) 1039 { 1040 return uniformAddress(bufferIndex, index) + offset * sizeof(float4); 1041 } 1042 readConstant(const Src & src,unsigned int offset)1043 Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset) 1044 { 1045 Vector4f c; 1046 unsigned int i = src.index + offset; 1047 1048 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 1049 { 1050 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i)); 1051 1052 c.x = c.x.xxxx; 1053 c.y = c.y.yyyy; 1054 c.z = c.z.zzzz; 1055 c.w = c.w.wwww; 1056 1057 if(shader->containsDefineInstruction()) // Constant may be known at compile time 1058 { 1059 for(size_t j = 0; j < shader->getLength(); j++) 1060 { 1061 const Shader::Instruction &instruction = *shader->getInstruction(j); 1062 1063 if(instruction.opcode == Shader::OPCODE_DEF) 1064 { 1065 if(instruction.dst.index == i) 1066 { 1067 c.x = Float4(instruction.src[0].value[0]); 1068 c.y = Float4(instruction.src[0].value[1]); 1069 c.z = Float4(instruction.src[0].value[2]); 1070 c.w = Float4(instruction.src[0].value[3]); 1071 1072 break; 1073 } 1074 } 1075 } 1076 } 1077 } 1078 else if(!src.rel.dynamic || src.rel.type == Shader::PARAMETER_LOOP) 1079 { 1080 Int a = relativeAddress(src.rel, src.bufferIndex); 1081 1082 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a)); 1083 1084 c.x = c.x.xxxx; 1085 c.y = c.y.yyyy; 1086 c.z = c.z.zzzz; 1087 c.w = c.w.wwww; 1088 } 1089 else 1090 { 1091 int component = src.rel.swizzle & 0x03; 1092 Float4 a; 1093 1094 switch(src.rel.type) 1095 { 1096 case Shader::PARAMETER_TEMP: a = r[src.rel.index][component]; break; 1097 case Shader::PARAMETER_INPUT: a = v[src.rel.index][component]; break; 1098 case Shader::PARAMETER_OUTPUT: a = oC[src.rel.index][component]; break; 1099 case Shader::PARAMETER_CONST: a = *Pointer<Float>(uniformAddress(src.bufferIndex, src.rel.index) + component * sizeof(float)); break; 1100 case Shader::PARAMETER_MISCTYPE: 1101 switch(src.rel.index) 1102 { 1103 case Shader::VPosIndex: a = vPos.x; break; 1104 case Shader::VFaceIndex: a = vFace.x; break; 1105 default: ASSERT(false); 1106 } 1107 break; 1108 default: ASSERT(false); 1109 } 1110 1111 Int4 index = Int4(i) + As<Int4>(a) * Int4(src.rel.scale); 1112 1113 if (src.bufferIndex == -1) 1114 { 1115 index = Min(As<UInt4>(index), UInt4(VERTEX_UNIFORM_VECTORS)); // Clamp to constant register range, c[VERTEX_UNIFORM_VECTORS] = {0, 0, 0, 0} 1116 } 1117 1118 Int index0 = Extract(index, 0); 1119 Int index1 = Extract(index, 1); 1120 Int index2 = Extract(index, 2); 1121 Int index3 = Extract(index, 3); 1122 1123 c.x = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index0), 16); 1124 c.y = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index1), 16); 1125 c.z = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index2), 16); 1126 c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, 0, index3), 16); 1127 1128 transpose4x4(c.x, c.y, c.z, c.w); 1129 } 1130 1131 return c; 1132 } 1133 relativeAddress(const Shader::Relative & rel,int bufferIndex)1134 Int PixelProgram::relativeAddress(const Shader::Relative &rel, int bufferIndex) 1135 { 1136 ASSERT(!rel.dynamic); 1137 1138 if(rel.type == Shader::PARAMETER_TEMP) 1139 { 1140 return As<Int>(Extract(r[rel.index].x, 0)) * rel.scale; 1141 } 1142 else if(rel.type == Shader::PARAMETER_INPUT) 1143 { 1144 return As<Int>(Extract(v[rel.index].x, 0)) * rel.scale; 1145 } 1146 else if(rel.type == Shader::PARAMETER_OUTPUT) 1147 { 1148 return As<Int>(Extract(oC[rel.index].x, 0)) * rel.scale; 1149 } 1150 else if(rel.type == Shader::PARAMETER_CONST) 1151 { 1152 return *Pointer<Int>(uniformAddress(bufferIndex, rel.index)) * rel.scale; 1153 } 1154 else if(rel.type == Shader::PARAMETER_LOOP) 1155 { 1156 return aL[loopDepth]; 1157 } 1158 else ASSERT(false); 1159 1160 return 0; 1161 } 1162 dynamicAddress(const Shader::Relative & rel)1163 Int4 PixelProgram::dynamicAddress(const Shader::Relative &rel) 1164 { 1165 int component = rel.swizzle & 0x03; 1166 Float4 a; 1167 1168 switch(rel.type) 1169 { 1170 case Shader::PARAMETER_TEMP: a = r[rel.index][component]; break; 1171 case Shader::PARAMETER_INPUT: a = v[rel.index][component]; break; 1172 case Shader::PARAMETER_OUTPUT: a = oC[rel.index][component]; break; 1173 case Shader::PARAMETER_MISCTYPE: 1174 switch(rel.index) 1175 { 1176 case Shader::VPosIndex: a = vPos.x; break; 1177 case Shader::VFaceIndex: a = vFace.x; break; 1178 default: ASSERT(false); 1179 } 1180 break; 1181 default: ASSERT(false); 1182 } 1183 1184 return As<Int4>(a) * Int4(rel.scale); 1185 } 1186 linearToSRGB(const Float4 & x)1187 Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) 1188 { 1189 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x)); 1190 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f); 1191 1192 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f)); 1193 } 1194 M3X2(Vector4f & dst,Vector4f & src0,const Src & src1)1195 void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1) 1196 { 1197 Vector4f row0 = fetchRegister(src1, 0); 1198 Vector4f row1 = fetchRegister(src1, 1); 1199 1200 dst.x = dot3(src0, row0); 1201 dst.y = dot3(src0, row1); 1202 } 1203 M3X3(Vector4f & dst,Vector4f & src0,const Src & src1)1204 void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1205 { 1206 Vector4f row0 = fetchRegister(src1, 0); 1207 Vector4f row1 = fetchRegister(src1, 1); 1208 Vector4f row2 = fetchRegister(src1, 2); 1209 1210 dst.x = dot3(src0, row0); 1211 dst.y = dot3(src0, row1); 1212 dst.z = dot3(src0, row2); 1213 } 1214 M3X4(Vector4f & dst,Vector4f & src0,const Src & src1)1215 void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1216 { 1217 Vector4f row0 = fetchRegister(src1, 0); 1218 Vector4f row1 = fetchRegister(src1, 1); 1219 Vector4f row2 = fetchRegister(src1, 2); 1220 Vector4f row3 = fetchRegister(src1, 3); 1221 1222 dst.x = dot3(src0, row0); 1223 dst.y = dot3(src0, row1); 1224 dst.z = dot3(src0, row2); 1225 dst.w = dot3(src0, row3); 1226 } 1227 M4X3(Vector4f & dst,Vector4f & src0,const Src & src1)1228 void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1229 { 1230 Vector4f row0 = fetchRegister(src1, 0); 1231 Vector4f row1 = fetchRegister(src1, 1); 1232 Vector4f row2 = fetchRegister(src1, 2); 1233 1234 dst.x = dot4(src0, row0); 1235 dst.y = dot4(src0, row1); 1236 dst.z = dot4(src0, row2); 1237 } 1238 M4X4(Vector4f & dst,Vector4f & src0,const Src & src1)1239 void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1240 { 1241 Vector4f row0 = fetchRegister(src1, 0); 1242 Vector4f row1 = fetchRegister(src1, 1); 1243 Vector4f row2 = fetchRegister(src1, 2); 1244 Vector4f row3 = fetchRegister(src1, 3); 1245 1246 dst.x = dot4(src0, row0); 1247 dst.y = dot4(src0, row1); 1248 dst.z = dot4(src0, row2); 1249 dst.w = dot4(src0, row3); 1250 } 1251 TEX(Vector4f & dst,Vector4f & src0,const Src & src1,bool project,bool bias)1252 void PixelProgram::TEX(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) 1253 { 1254 if(project) 1255 { 1256 Vector4f proj; 1257 Float4 rw = reciprocal(src0.w); 1258 proj.x = src0.x * rw; 1259 proj.y = src0.y * rw; 1260 proj.z = src0.z * rw; 1261 1262 dst = sampleTexture(src1, proj, src0.x, (src0), (src0), (src0), Implicit); 1263 } 1264 else 1265 { 1266 dst = sampleTexture(src1, src0, src0.x, (src0), (src0), (src0), bias ? Bias : Implicit); 1267 } 1268 } 1269 TEXOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset)1270 void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset) 1271 { 1272 dst = sampleTexture(src1, src0, (src0.x), (src0), (src0), offset, {Implicit, Offset}); 1273 } 1274 TEXLODOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & lod)1275 void PixelProgram::TEXLODOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &lod) 1276 { 1277 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Lod, Offset}); 1278 } 1279 TEXBIAS(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & bias)1280 void PixelProgram::TEXBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &bias) 1281 { 1282 dst = sampleTexture(src1, src0, bias, (src0), (src0), (src0), Bias); 1283 } 1284 TEXOFFSETBIAS(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & bias)1285 void PixelProgram::TEXOFFSETBIAS(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, Float4 &bias) 1286 { 1287 dst = sampleTexture(src1, src0, bias, (src0), (src0), offset, {Bias, Offset}); 1288 } 1289 TEXELFETCH(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & lod)1290 void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Float4 &lod) 1291 { 1292 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Fetch); 1293 } 1294 TEXELFETCHOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,Float4 & lod)1295 void PixelProgram::TEXELFETCHOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset, Float4 &lod) 1296 { 1297 dst = sampleTexture(src1, src0, lod, (src0), (src0), offset, {Fetch, Offset}); 1298 } 1299 TEXGRAD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & dsx,Vector4f & dsy)1300 void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy) 1301 { 1302 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, (src0), Grad); 1303 } 1304 TEXGRADOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & dsx,Vector4f & dsy,Vector4f & offset)1305 void PixelProgram::TEXGRADOFFSET(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &dsx, Vector4f &dsy, Vector4f &offset) 1306 { 1307 dst = sampleTexture(src1, src0, (src0.x), dsx, dsy, offset, {Grad, Offset}); 1308 } 1309 TEXLOD(Vector4f & dst,Vector4f & src0,const Src & src1,Float4 & lod)1310 void PixelProgram::TEXLOD(Vector4f &dst, Vector4f &src0, const Src &src1, Float4 &lod) 1311 { 1312 dst = sampleTexture(src1, src0, lod, (src0), (src0), (src0), Lod); 1313 } 1314 TEXSIZE(Vector4f & dst,Float4 & lod,const Src & src1)1315 void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1) 1316 { 1317 bool uniformSampler = (src1.type == Shader::PARAMETER_SAMPLER && src1.rel.type == Shader::PARAMETER_VOID); 1318 Int offset = uniformSampler ? src1.index * sizeof(Texture) : As<Int>(Float(fetchRegister(src1).x.x)) * sizeof(Texture); 1319 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + offset; 1320 1321 dst = SamplerCore::textureSize(texture, lod); 1322 } 1323 TEXKILL(Int cMask[4],Vector4f & src,unsigned char mask)1324 void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask) 1325 { 1326 Int kill = -1; 1327 1328 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f))); 1329 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f))); 1330 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f))); 1331 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f))); 1332 1333 // FIXME: Dynamic branching affects TEXKILL? 1334 // if(shader->containsDynamicBranching()) 1335 // { 1336 // kill = ~SignMask(enableMask()); 1337 // } 1338 1339 for(unsigned int q = 0; q < state.multiSample; q++) 1340 { 1341 cMask[q] &= kill; 1342 } 1343 1344 // FIXME: Branch to end of shader if all killed? 1345 } 1346 DISCARD(Int cMask[4],const Shader::Instruction * instruction)1347 void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction) 1348 { 1349 Int kill = 0; 1350 1351 if(shader->containsDynamicBranching()) 1352 { 1353 kill = ~SignMask(enableMask(instruction)); 1354 } 1355 1356 for(unsigned int q = 0; q < state.multiSample; q++) 1357 { 1358 cMask[q] &= kill; 1359 } 1360 1361 // FIXME: Branch to end of shader if all killed? 1362 } 1363 DFDX(Vector4f & dst,Vector4f & src)1364 void PixelProgram::DFDX(Vector4f &dst, Vector4f &src) 1365 { 1366 dst.x = src.x.yyww - src.x.xxzz; 1367 dst.y = src.y.yyww - src.y.xxzz; 1368 dst.z = src.z.yyww - src.z.xxzz; 1369 dst.w = src.w.yyww - src.w.xxzz; 1370 } 1371 DFDY(Vector4f & dst,Vector4f & src)1372 void PixelProgram::DFDY(Vector4f &dst, Vector4f &src) 1373 { 1374 dst.x = src.x.zwzw - src.x.xyxy; 1375 dst.y = src.y.zwzw - src.y.xyxy; 1376 dst.z = src.z.zwzw - src.z.xyxy; 1377 dst.w = src.w.zwzw - src.w.xyxy; 1378 } 1379 FWIDTH(Vector4f & dst,Vector4f & src)1380 void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src) 1381 { 1382 // abs(dFdx(src)) + abs(dFdy(src)); 1383 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy); 1384 dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy); 1385 dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy); 1386 dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy); 1387 } 1388 BREAK()1389 void PixelProgram::BREAK() 1390 { 1391 enableBreak = enableBreak & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1392 } 1393 BREAKC(Vector4f & src0,Vector4f & src1,Control control)1394 void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control) 1395 { 1396 Int4 condition; 1397 1398 switch(control) 1399 { 1400 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1401 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1402 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1403 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1404 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1405 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1406 default: 1407 ASSERT(false); 1408 } 1409 1410 BREAK(condition); 1411 } 1412 BREAKP(const Src & predicateRegister)1413 void PixelProgram::BREAKP(const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC 1414 { 1415 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1416 1417 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1418 { 1419 condition = ~condition; 1420 } 1421 1422 BREAK(condition); 1423 } 1424 BREAK(Int4 & condition)1425 void PixelProgram::BREAK(Int4 &condition) 1426 { 1427 condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1428 1429 enableBreak = enableBreak & ~condition; 1430 } 1431 CONTINUE()1432 void PixelProgram::CONTINUE() 1433 { 1434 enableContinue = enableContinue & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1435 } 1436 TEST()1437 void PixelProgram::TEST() 1438 { 1439 enableContinue = restoreContinue.back(); 1440 restoreContinue.pop_back(); 1441 } 1442 SCALAR()1443 void PixelProgram::SCALAR() 1444 { 1445 scalar = true; 1446 } 1447 CALL(int labelIndex,int callSiteIndex)1448 void PixelProgram::CALL(int labelIndex, int callSiteIndex) 1449 { 1450 if(!labelBlock[labelIndex]) 1451 { 1452 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1453 } 1454 1455 if(callRetBlock[labelIndex].size() > 1) 1456 { 1457 callStack[stackIndex++] = UInt(callSiteIndex); 1458 } 1459 1460 Int4 restoreLeave = enableLeave; 1461 1462 Nucleus::createBr(labelBlock[labelIndex]); 1463 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1464 1465 enableLeave = restoreLeave; 1466 } 1467 CALLNZ(int labelIndex,int callSiteIndex,const Src & src)1468 void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src) 1469 { 1470 if(src.type == Shader::PARAMETER_CONSTBOOL) 1471 { 1472 CALLNZb(labelIndex, callSiteIndex, src); 1473 } 1474 else if(src.type == Shader::PARAMETER_PREDICATE) 1475 { 1476 CALLNZp(labelIndex, callSiteIndex, src); 1477 } 1478 else ASSERT(false); 1479 } 1480 CALLNZb(int labelIndex,int callSiteIndex,const Src & boolRegister)1481 void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister) 1482 { 1483 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1484 1485 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1486 { 1487 condition = !condition; 1488 } 1489 1490 if(!labelBlock[labelIndex]) 1491 { 1492 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1493 } 1494 1495 if(callRetBlock[labelIndex].size() > 1) 1496 { 1497 callStack[stackIndex++] = UInt(callSiteIndex); 1498 } 1499 1500 Int4 restoreLeave = enableLeave; 1501 1502 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1503 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1504 1505 enableLeave = restoreLeave; 1506 } 1507 CALLNZp(int labelIndex,int callSiteIndex,const Src & predicateRegister)1508 void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister) 1509 { 1510 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1511 1512 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1513 { 1514 condition = ~condition; 1515 } 1516 1517 condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1518 1519 if(!labelBlock[labelIndex]) 1520 { 1521 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1522 } 1523 1524 if(callRetBlock[labelIndex].size() > 1) 1525 { 1526 callStack[stackIndex++] = UInt(callSiteIndex); 1527 } 1528 1529 enableIndex++; 1530 enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; 1531 Int4 restoreLeave = enableLeave; 1532 1533 Bool notAllFalse = SignMask(condition) != 0; 1534 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1535 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1536 1537 enableIndex--; 1538 enableLeave = restoreLeave; 1539 } 1540 ELSE()1541 void PixelProgram::ELSE() 1542 { 1543 ifDepth--; 1544 1545 BasicBlock *falseBlock = ifFalseBlock[ifDepth]; 1546 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1547 1548 if(isConditionalIf[ifDepth]) 1549 { 1550 Int4 condition = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1551 Bool notAllFalse = SignMask(condition) != 0; 1552 1553 branch(notAllFalse, falseBlock, endBlock); 1554 1555 enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] & enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1556 } 1557 else 1558 { 1559 Nucleus::createBr(endBlock); 1560 Nucleus::setInsertBlock(falseBlock); 1561 } 1562 1563 ifFalseBlock[ifDepth] = endBlock; 1564 1565 ifDepth++; 1566 } 1567 ENDIF()1568 void PixelProgram::ENDIF() 1569 { 1570 ifDepth--; 1571 1572 BasicBlock *endBlock = ifFalseBlock[ifDepth]; 1573 1574 Nucleus::createBr(endBlock); 1575 Nucleus::setInsertBlock(endBlock); 1576 1577 if(isConditionalIf[ifDepth]) 1578 { 1579 enableIndex--; 1580 } 1581 } 1582 ENDLOOP()1583 void PixelProgram::ENDLOOP() 1584 { 1585 loopRepDepth--; 1586 1587 aL[loopDepth] = aL[loopDepth] + increment[loopDepth]; // FIXME: += 1588 1589 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1590 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1591 1592 Nucleus::createBr(testBlock); 1593 Nucleus::setInsertBlock(endBlock); 1594 1595 loopDepth--; 1596 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1597 } 1598 ENDREP()1599 void PixelProgram::ENDREP() 1600 { 1601 loopRepDepth--; 1602 1603 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1604 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1605 1606 Nucleus::createBr(testBlock); 1607 Nucleus::setInsertBlock(endBlock); 1608 1609 loopDepth--; 1610 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1611 } 1612 ENDWHILE()1613 void PixelProgram::ENDWHILE() 1614 { 1615 loopRepDepth--; 1616 1617 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1618 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1619 1620 Nucleus::createBr(testBlock); 1621 Nucleus::setInsertBlock(endBlock); 1622 1623 enableIndex--; 1624 scalar = false; 1625 } 1626 ENDSWITCH()1627 void PixelProgram::ENDSWITCH() 1628 { 1629 loopRepDepth--; 1630 1631 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1632 1633 Nucleus::createBr(endBlock); 1634 Nucleus::setInsertBlock(endBlock); 1635 } 1636 IF(const Src & src)1637 void PixelProgram::IF(const Src &src) 1638 { 1639 if(src.type == Shader::PARAMETER_CONSTBOOL) 1640 { 1641 IFb(src); 1642 } 1643 else if(src.type == Shader::PARAMETER_PREDICATE) 1644 { 1645 IFp(src); 1646 } 1647 else 1648 { 1649 Int4 condition = As<Int4>(fetchRegister(src).x); 1650 IF(condition); 1651 } 1652 } 1653 IFb(const Src & boolRegister)1654 void PixelProgram::IFb(const Src &boolRegister) 1655 { 1656 ASSERT(ifDepth < 24 + 4); 1657 1658 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1659 1660 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1661 { 1662 condition = !condition; 1663 } 1664 1665 BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1666 BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1667 1668 branch(condition, trueBlock, falseBlock); 1669 1670 isConditionalIf[ifDepth] = false; 1671 ifFalseBlock[ifDepth] = falseBlock; 1672 1673 ifDepth++; 1674 } 1675 IFp(const Src & predicateRegister)1676 void PixelProgram::IFp(const Src &predicateRegister) 1677 { 1678 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1679 1680 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1681 { 1682 condition = ~condition; 1683 } 1684 1685 IF(condition); 1686 } 1687 IFC(Vector4f & src0,Vector4f & src1,Control control)1688 void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control) 1689 { 1690 Int4 condition; 1691 1692 switch(control) 1693 { 1694 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1695 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1696 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1697 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1698 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1699 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1700 default: 1701 ASSERT(false); 1702 } 1703 1704 IF(condition); 1705 } 1706 IF(Int4 & condition)1707 void PixelProgram::IF(Int4 &condition) 1708 { 1709 condition &= enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1710 1711 enableIndex++; 1712 enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; 1713 1714 BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1715 BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1716 1717 Bool notAllFalse = SignMask(condition) != 0; 1718 1719 branch(notAllFalse, trueBlock, falseBlock); 1720 1721 isConditionalIf[ifDepth] = true; 1722 ifFalseBlock[ifDepth] = falseBlock; 1723 1724 ifDepth++; 1725 } 1726 LABEL(int labelIndex)1727 void PixelProgram::LABEL(int labelIndex) 1728 { 1729 if(!labelBlock[labelIndex]) 1730 { 1731 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1732 } 1733 1734 Nucleus::setInsertBlock(labelBlock[labelIndex]); 1735 currentLabel = labelIndex; 1736 } 1737 LOOP(const Src & integerRegister)1738 void PixelProgram::LOOP(const Src &integerRegister) 1739 { 1740 loopDepth++; 1741 1742 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1743 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1])); 1744 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2])); 1745 1746 // If(increment[loopDepth] == 0) 1747 // { 1748 // increment[loopDepth] = 1; 1749 // } 1750 1751 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1752 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1753 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1754 1755 loopRepTestBlock[loopRepDepth] = testBlock; 1756 loopRepEndBlock[loopRepDepth] = endBlock; 1757 1758 // FIXME: jump(testBlock) 1759 Nucleus::createBr(testBlock); 1760 Nucleus::setInsertBlock(testBlock); 1761 1762 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1763 Nucleus::setInsertBlock(loopBlock); 1764 1765 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1766 1767 loopRepDepth++; 1768 } 1769 REP(const Src & integerRegister)1770 void PixelProgram::REP(const Src &integerRegister) 1771 { 1772 loopDepth++; 1773 1774 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1775 aL[loopDepth] = aL[loopDepth - 1]; 1776 1777 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1778 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1779 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1780 1781 loopRepTestBlock[loopRepDepth] = testBlock; 1782 loopRepEndBlock[loopRepDepth] = endBlock; 1783 1784 // FIXME: jump(testBlock) 1785 Nucleus::createBr(testBlock); 1786 Nucleus::setInsertBlock(testBlock); 1787 1788 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1789 Nucleus::setInsertBlock(loopBlock); 1790 1791 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1792 1793 loopRepDepth++; 1794 } 1795 WHILE(const Src & temporaryRegister)1796 void PixelProgram::WHILE(const Src &temporaryRegister) 1797 { 1798 enableIndex++; 1799 1800 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1801 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1802 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1803 1804 loopRepTestBlock[loopRepDepth] = testBlock; 1805 loopRepEndBlock[loopRepDepth] = endBlock; 1806 1807 Int4 restoreBreak = enableBreak; 1808 restoreContinue.push_back(enableContinue); 1809 1810 // TODO: jump(testBlock) 1811 Nucleus::createBr(testBlock); 1812 Nucleus::setInsertBlock(testBlock); 1813 1814 const Vector4f &src = fetchRegister(temporaryRegister); 1815 Int4 condition = As<Int4>(src.x); 1816 condition &= enableStack[Min(enableIndex - 1, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1817 if(shader->containsLeaveInstruction()) condition &= enableLeave; 1818 if(shader->containsBreakInstruction()) condition &= enableBreak; 1819 enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))] = condition; 1820 1821 Bool notAllFalse = SignMask(condition) != 0; 1822 branch(notAllFalse, loopBlock, endBlock); 1823 1824 Nucleus::setInsertBlock(endBlock); 1825 enableBreak = restoreBreak; 1826 1827 Nucleus::setInsertBlock(loopBlock); 1828 1829 loopRepDepth++; 1830 scalar = false; 1831 } 1832 SWITCH()1833 void PixelProgram::SWITCH() 1834 { 1835 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1836 1837 loopRepTestBlock[loopRepDepth] = nullptr; 1838 loopRepEndBlock[loopRepDepth] = endBlock; 1839 1840 Int4 restoreBreak = enableBreak; 1841 1842 BasicBlock *currentBlock = Nucleus::getInsertBlock(); 1843 1844 Nucleus::setInsertBlock(endBlock); 1845 enableBreak = restoreBreak; 1846 1847 Nucleus::setInsertBlock(currentBlock); 1848 1849 loopRepDepth++; 1850 } 1851 RET()1852 void PixelProgram::RET() 1853 { 1854 if(currentLabel == -1) 1855 { 1856 returnBlock = Nucleus::createBasicBlock(); 1857 Nucleus::createBr(returnBlock); 1858 } 1859 else 1860 { 1861 BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); 1862 1863 if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack 1864 { 1865 // FIXME: Encapsulate 1866 UInt index = callStack[--stackIndex]; 1867 1868 Value *value = index.loadValue(); 1869 SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); 1870 1871 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++) 1872 { 1873 Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]); 1874 } 1875 } 1876 else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination 1877 { 1878 Nucleus::createBr(callRetBlock[currentLabel][0]); 1879 } 1880 else // Function isn't called 1881 { 1882 Nucleus::createBr(unreachableBlock); 1883 } 1884 1885 Nucleus::setInsertBlock(unreachableBlock); 1886 Nucleus::createUnreachable(); 1887 } 1888 } 1889 LEAVE()1890 void PixelProgram::LEAVE() 1891 { 1892 enableLeave = enableLeave & ~enableStack[Min(enableIndex, Int(MAX_SHADER_ENABLE_STACK_SIZE))]; 1893 1894 // FIXME: Return from function if all instances left 1895 // FIXME: Use enableLeave in other control-flow constructs 1896 } 1897 } 1898