1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "PixelProgram.hpp" 16 #include "Primitive.hpp" 17 #include "Renderer.hpp" 18 #include "SamplerCore.hpp" 19 20 namespace sw 21 { 22 extern bool postBlendSRGB; 23 extern bool booleanFaceRegister; 24 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates 25 extern bool fullPixelPositionRegister; 26 setBuiltins(Int & x,Int & y,Float4 (& z)[4],Float4 & w)27 void PixelProgram::setBuiltins(Int &x, Int &y, Float4(&z)[4], Float4 &w) 28 { 29 if(shader->getVersion() >= 0x0300) 30 { 31 if(shader->isVPosDeclared()) 32 { 33 if(!halfIntegerCoordinates) 34 { 35 vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1); 36 vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1); 37 } 38 else 39 { 40 vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f); 41 vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f); 42 } 43 44 if(fullPixelPositionRegister) 45 { 46 vPos.z = z[0]; // FIXME: Centroid? 47 vPos.w = w; // FIXME: Centroid? 48 } 49 } 50 51 if(shader->isVFaceDeclared()) 52 { 53 Float4 area = *Pointer<Float>(primitive + OFFSET(Primitive, area)); 54 Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area; 55 56 vFace.x = face; 57 vFace.y = face; 58 vFace.z = face; 59 vFace.w = face; 60 } 61 } 62 } 63 applyShader(Int cMask[4])64 void PixelProgram::applyShader(Int cMask[4]) 65 { 66 enableIndex = 0; 67 stackIndex = 0; 68 69 if(shader->containsLeaveInstruction()) 70 { 71 enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 72 } 73 74 for(int i = 0; i < RENDERTARGETS; i++) 75 { 76 if(state.targetFormat[i] != FORMAT_NULL) 77 { 78 oC[i] = Vector4f(0.0f, 0.0f, 0.0f, 0.0f); 79 } 80 } 81 82 // Create all call site return blocks up front 83 for(size_t i = 0; i < shader->getLength(); i++) 84 { 85 const Shader::Instruction *instruction = shader->getInstruction(i); 86 Shader::Opcode opcode = instruction->opcode; 87 88 if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ) 89 { 90 const Dst &dst = instruction->dst; 91 92 ASSERT(callRetBlock[dst.label].size() == dst.callSite); 93 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock()); 94 } 95 } 96 97 bool broadcastColor0 = true; 98 99 for(size_t i = 0; i < shader->getLength(); i++) 100 { 101 const Shader::Instruction *instruction = shader->getInstruction(i); 102 Shader::Opcode opcode = instruction->opcode; 103 104 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB) 105 { 106 continue; 107 } 108 109 const Dst &dst = instruction->dst; 110 const Src &src0 = instruction->src[0]; 111 const Src &src1 = instruction->src[1]; 112 const Src &src2 = instruction->src[2]; 113 const Src &src3 = instruction->src[3]; 114 const Src &src4 = instruction->src[4]; 115 116 bool predicate = instruction->predicate; 117 Control control = instruction->control; 118 bool pp = dst.partialPrecision; 119 bool project = instruction->project; 120 bool bias = instruction->bias; 121 122 Vector4f d; 123 Vector4f s0; 124 Vector4f s1; 125 Vector4f s2; 126 Vector4f s3; 127 Vector4f s4; 128 129 if(opcode == Shader::OPCODE_TEXKILL) // Takes destination as input 130 { 131 if(dst.type == Shader::PARAMETER_TEXTURE) 132 { 133 d.x = v[2 + dst.index].x; 134 d.y = v[2 + dst.index].y; 135 d.z = v[2 + dst.index].z; 136 d.w = v[2 + dst.index].w; 137 } 138 else 139 { 140 d = r[dst.index]; 141 } 142 } 143 144 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegister(src0); 145 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegister(src1); 146 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegister(src2); 147 if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegister(src3); 148 if(src4.type != Shader::PARAMETER_VOID) s4 = fetchRegister(src4); 149 150 switch(opcode) 151 { 152 case Shader::OPCODE_PS_2_0: break; 153 case Shader::OPCODE_PS_2_x: break; 154 case Shader::OPCODE_PS_3_0: break; 155 case Shader::OPCODE_DEF: break; 156 case Shader::OPCODE_DCL: break; 157 case Shader::OPCODE_NOP: break; 158 case Shader::OPCODE_MOV: mov(d, s0); break; 159 case Shader::OPCODE_NEG: neg(d, s0); break; 160 case Shader::OPCODE_INEG: ineg(d, s0); break; 161 case Shader::OPCODE_F2B: f2b(d, s0); break; 162 case Shader::OPCODE_B2F: b2f(d, s0); break; 163 case Shader::OPCODE_F2I: f2i(d, s0); break; 164 case Shader::OPCODE_I2F: i2f(d, s0); break; 165 case Shader::OPCODE_F2U: f2u(d, s0); break; 166 case Shader::OPCODE_U2F: u2f(d, s0); break; 167 case Shader::OPCODE_I2B: i2b(d, s0); break; 168 case Shader::OPCODE_B2I: b2i(d, s0); break; 169 case Shader::OPCODE_ADD: add(d, s0, s1); break; 170 case Shader::OPCODE_IADD: iadd(d, s0, s1); break; 171 case Shader::OPCODE_SUB: sub(d, s0, s1); break; 172 case Shader::OPCODE_ISUB: isub(d, s0, s1); break; 173 case Shader::OPCODE_MUL: mul(d, s0, s1); break; 174 case Shader::OPCODE_IMUL: imul(d, s0, s1); break; 175 case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break; 176 case Shader::OPCODE_IMAD: imad(d, s0, s1, s2); break; 177 case Shader::OPCODE_DP1: dp1(d, s0, s1); break; 178 case Shader::OPCODE_DP2: dp2(d, s0, s1); break; 179 case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break; 180 case Shader::OPCODE_DP3: dp3(d, s0, s1); break; 181 case Shader::OPCODE_DP4: dp4(d, s0, s1); break; 182 case Shader::OPCODE_DET2: det2(d, s0, s1); break; 183 case Shader::OPCODE_DET3: det3(d, s0, s1, s2); break; 184 case Shader::OPCODE_DET4: det4(d, s0, s1, s2, s3); break; 185 case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break; 186 case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break; 187 case Shader::OPCODE_UCMP: ucmp(d, s0, s1, control); break; 188 case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break; 189 case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break; 190 case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break; 191 case Shader::OPCODE_FRC: frc(d, s0); break; 192 case Shader::OPCODE_TRUNC: trunc(d, s0); break; 193 case Shader::OPCODE_FLOOR: floor(d, s0); break; 194 case Shader::OPCODE_ROUND: round(d, s0); break; 195 case Shader::OPCODE_ROUNDEVEN: roundEven(d, s0); break; 196 case Shader::OPCODE_CEIL: ceil(d, s0); break; 197 case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break; 198 case Shader::OPCODE_EXP2: exp2(d, s0, pp); break; 199 case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break; 200 case Shader::OPCODE_LOG2: log2(d, s0, pp); break; 201 case Shader::OPCODE_EXP: exp(d, s0, pp); break; 202 case Shader::OPCODE_LOG: log(d, s0, pp); break; 203 case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break; 204 case Shader::OPCODE_DIV: div(d, s0, s1); break; 205 case Shader::OPCODE_IDIV: idiv(d, s0, s1); break; 206 case Shader::OPCODE_UDIV: udiv(d, s0, s1); break; 207 case Shader::OPCODE_MOD: mod(d, s0, s1); break; 208 case Shader::OPCODE_IMOD: imod(d, s0, s1); break; 209 case Shader::OPCODE_UMOD: umod(d, s0, s1); break; 210 case Shader::OPCODE_SHL: shl(d, s0, s1); break; 211 case Shader::OPCODE_ISHR: ishr(d, s0, s1); break; 212 case Shader::OPCODE_USHR: ushr(d, s0, s1); break; 213 case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break; 214 case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break; 215 case Shader::OPCODE_RSQ: rsq(d, s0, pp); break; 216 case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break; 217 case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break; 218 case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break; 219 case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break; 220 case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break; 221 case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break; 222 case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break; 223 case Shader::OPCODE_MIN: min(d, s0, s1); break; 224 case Shader::OPCODE_IMIN: imin(d, s0, s1); break; 225 case Shader::OPCODE_UMIN: umin(d, s0, s1); break; 226 case Shader::OPCODE_MAX: max(d, s0, s1); break; 227 case Shader::OPCODE_IMAX: imax(d, s0, s1); break; 228 case Shader::OPCODE_UMAX: umax(d, s0, s1); break; 229 case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break; 230 case Shader::OPCODE_STEP: step(d, s0, s1); break; 231 case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break; 232 case Shader::OPCODE_FLOATBITSTOINT: 233 case Shader::OPCODE_FLOATBITSTOUINT: 234 case Shader::OPCODE_INTBITSTOFLOAT: 235 case Shader::OPCODE_UINTBITSTOFLOAT: d = s0; break; 236 case Shader::OPCODE_PACKSNORM2x16: packSnorm2x16(d, s0); break; 237 case Shader::OPCODE_PACKUNORM2x16: packUnorm2x16(d, s0); break; 238 case Shader::OPCODE_PACKHALF2x16: packHalf2x16(d, s0); break; 239 case Shader::OPCODE_UNPACKSNORM2x16: unpackSnorm2x16(d, s0); break; 240 case Shader::OPCODE_UNPACKUNORM2x16: unpackUnorm2x16(d, s0); break; 241 case Shader::OPCODE_UNPACKHALF2x16: unpackHalf2x16(d, s0); break; 242 case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break; 243 case Shader::OPCODE_POW: pow(d, s0, s1, pp); break; 244 case Shader::OPCODE_SGN: sgn(d, s0); break; 245 case Shader::OPCODE_ISGN: isgn(d, s0); break; 246 case Shader::OPCODE_CRS: crs(d, s0, s1); break; 247 case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break; 248 case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break; 249 case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break; 250 case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break; 251 case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break; 252 case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break; 253 case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break; 254 case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break; 255 case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break; 256 case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break; 257 case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break; 258 case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break; 259 case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break; 260 case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break; 261 case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break; 262 case Shader::OPCODE_ABS: abs(d, s0); break; 263 case Shader::OPCODE_IABS: iabs(d, s0); break; 264 case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break; 265 case Shader::OPCODE_COS: cos(d, s0, pp); break; 266 case Shader::OPCODE_SIN: sin(d, s0, pp); break; 267 case Shader::OPCODE_TAN: tan(d, s0, pp); break; 268 case Shader::OPCODE_ACOS: acos(d, s0, pp); break; 269 case Shader::OPCODE_ASIN: asin(d, s0, pp); break; 270 case Shader::OPCODE_ATAN: atan(d, s0, pp); break; 271 case Shader::OPCODE_ATAN2: atan2(d, s0, s1, pp); break; 272 case Shader::OPCODE_COSH: cosh(d, s0, pp); break; 273 case Shader::OPCODE_SINH: sinh(d, s0, pp); break; 274 case Shader::OPCODE_TANH: tanh(d, s0, pp); break; 275 case Shader::OPCODE_ACOSH: acosh(d, s0, pp); break; 276 case Shader::OPCODE_ASINH: asinh(d, s0, pp); break; 277 case Shader::OPCODE_ATANH: atanh(d, s0, pp); break; 278 case Shader::OPCODE_M4X4: M4X4(d, s0, src1); break; 279 case Shader::OPCODE_M4X3: M4X3(d, s0, src1); break; 280 case Shader::OPCODE_M3X4: M3X4(d, s0, src1); break; 281 case Shader::OPCODE_M3X3: M3X3(d, s0, src1); break; 282 case Shader::OPCODE_M3X2: M3X2(d, s0, src1); break; 283 case Shader::OPCODE_TEX: TEXLD(d, s0, src1, project, bias); break; 284 case Shader::OPCODE_TEXLDD: TEXLDD(d, s0, src1, s2, s3); break; 285 case Shader::OPCODE_TEXLDL: TEXLDL(d, s0, src1); break; 286 case Shader::OPCODE_TEXSIZE: TEXSIZE(d, s0.x, src1); break; 287 case Shader::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break; 288 case Shader::OPCODE_TEXOFFSET: TEXOFFSET(d, s0, src1, s2, bias); break; 289 case Shader::OPCODE_TEXLDLOFFSET: TEXLDL(d, s0, src1, s2, bias); break; 290 case Shader::OPCODE_TEXELFETCH: TEXELFETCH(d, s0, src1); break; 291 case Shader::OPCODE_TEXELFETCHOFFSET: TEXELFETCH(d, s0, src1, s2); break; 292 case Shader::OPCODE_TEXGRAD: TEXGRAD(d, s0, src1, s2, s3); break; 293 case Shader::OPCODE_TEXGRADOFFSET: TEXGRAD(d, s0, src1, s2, s3, s4); break; 294 case Shader::OPCODE_DISCARD: DISCARD(cMask, instruction); break; 295 case Shader::OPCODE_DFDX: DFDX(d, s0); break; 296 case Shader::OPCODE_DFDY: DFDY(d, s0); break; 297 case Shader::OPCODE_FWIDTH: FWIDTH(d, s0); break; 298 case Shader::OPCODE_BREAK: BREAK(); break; 299 case Shader::OPCODE_BREAKC: BREAKC(s0, s1, control); break; 300 case Shader::OPCODE_BREAKP: BREAKP(src0); break; 301 case Shader::OPCODE_CONTINUE: CONTINUE(); break; 302 case Shader::OPCODE_TEST: TEST(); break; 303 case Shader::OPCODE_CALL: CALL(dst.label, dst.callSite); break; 304 case Shader::OPCODE_CALLNZ: CALLNZ(dst.label, dst.callSite, src0); break; 305 case Shader::OPCODE_ELSE: ELSE(); break; 306 case Shader::OPCODE_ENDIF: ENDIF(); break; 307 case Shader::OPCODE_ENDLOOP: ENDLOOP(); break; 308 case Shader::OPCODE_ENDREP: ENDREP(); break; 309 case Shader::OPCODE_ENDWHILE: ENDWHILE(); break; 310 case Shader::OPCODE_ENDSWITCH: ENDSWITCH(); break; 311 case Shader::OPCODE_IF: IF(src0); break; 312 case Shader::OPCODE_IFC: IFC(s0, s1, control); break; 313 case Shader::OPCODE_LABEL: LABEL(dst.index); break; 314 case Shader::OPCODE_LOOP: LOOP(src1); break; 315 case Shader::OPCODE_REP: REP(src0); break; 316 case Shader::OPCODE_WHILE: WHILE(src0); break; 317 case Shader::OPCODE_SWITCH: SWITCH(); break; 318 case Shader::OPCODE_RET: RET(); break; 319 case Shader::OPCODE_LEAVE: LEAVE(); break; 320 case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break; 321 case Shader::OPCODE_ALL: all(d.x, s0); break; 322 case Shader::OPCODE_ANY: any(d.x, s0); break; 323 case Shader::OPCODE_NOT: bitwise_not(d, s0); break; 324 case Shader::OPCODE_OR: bitwise_or(d, s0, s1); break; 325 case Shader::OPCODE_XOR: bitwise_xor(d, s0, s1); break; 326 case Shader::OPCODE_AND: bitwise_and(d, s0, s1); break; 327 case Shader::OPCODE_EQ: equal(d, s0, s1); break; 328 case Shader::OPCODE_NE: notEqual(d, s0, s1); break; 329 case Shader::OPCODE_END: break; 330 default: 331 ASSERT(false); 332 } 333 334 if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP) 335 { 336 if(dst.integer) 337 { 338 switch(opcode) 339 { 340 case Shader::OPCODE_DIV: 341 if(dst.x) d.x = Trunc(d.x); 342 if(dst.y) d.y = Trunc(d.y); 343 if(dst.z) d.z = Trunc(d.z); 344 if(dst.w) d.w = Trunc(d.w); 345 break; 346 default: 347 break; // No truncation to integer required when arguments are integer 348 } 349 } 350 351 if(dst.saturate) 352 { 353 if(dst.x) d.x = Max(d.x, Float4(0.0f)); 354 if(dst.y) d.y = Max(d.y, Float4(0.0f)); 355 if(dst.z) d.z = Max(d.z, Float4(0.0f)); 356 if(dst.w) d.w = Max(d.w, Float4(0.0f)); 357 358 if(dst.x) d.x = Min(d.x, Float4(1.0f)); 359 if(dst.y) d.y = Min(d.y, Float4(1.0f)); 360 if(dst.z) d.z = Min(d.z, Float4(1.0f)); 361 if(dst.w) d.w = Min(d.w, Float4(1.0f)); 362 } 363 364 if(instruction->isPredicated()) 365 { 366 Vector4f pDst; // FIXME: Rename 367 368 switch(dst.type) 369 { 370 case Shader::PARAMETER_TEMP: 371 if(dst.rel.type == Shader::PARAMETER_VOID) 372 { 373 if(dst.x) pDst.x = r[dst.index].x; 374 if(dst.y) pDst.y = r[dst.index].y; 375 if(dst.z) pDst.z = r[dst.index].z; 376 if(dst.w) pDst.w = r[dst.index].w; 377 } 378 else 379 { 380 Int a = relativeAddress(dst); 381 382 if(dst.x) pDst.x = r[dst.index + a].x; 383 if(dst.y) pDst.y = r[dst.index + a].y; 384 if(dst.z) pDst.z = r[dst.index + a].z; 385 if(dst.w) pDst.w = r[dst.index + a].w; 386 } 387 break; 388 case Shader::PARAMETER_COLOROUT: 389 if(dst.rel.type == Shader::PARAMETER_VOID) 390 { 391 if(dst.x) pDst.x = oC[dst.index].x; 392 if(dst.y) pDst.y = oC[dst.index].y; 393 if(dst.z) pDst.z = oC[dst.index].z; 394 if(dst.w) pDst.w = oC[dst.index].w; 395 } 396 else 397 { 398 Int a = relativeAddress(dst) + dst.index; 399 400 if(dst.x) pDst.x = oC[a].x; 401 if(dst.y) pDst.y = oC[a].y; 402 if(dst.z) pDst.z = oC[a].z; 403 if(dst.w) pDst.w = oC[a].w; 404 } 405 break; 406 case Shader::PARAMETER_PREDICATE: 407 if(dst.x) pDst.x = p0.x; 408 if(dst.y) pDst.y = p0.y; 409 if(dst.z) pDst.z = p0.z; 410 if(dst.w) pDst.w = p0.w; 411 break; 412 case Shader::PARAMETER_DEPTHOUT: 413 pDst.x = oDepth; 414 break; 415 default: 416 ASSERT(false); 417 } 418 419 Int4 enable = enableMask(instruction); 420 421 Int4 xEnable = enable; 422 Int4 yEnable = enable; 423 Int4 zEnable = enable; 424 Int4 wEnable = enable; 425 426 if(predicate) 427 { 428 unsigned char pSwizzle = instruction->predicateSwizzle; 429 430 Float4 xPredicate = p0[(pSwizzle >> 0) & 0x03]; 431 Float4 yPredicate = p0[(pSwizzle >> 2) & 0x03]; 432 Float4 zPredicate = p0[(pSwizzle >> 4) & 0x03]; 433 Float4 wPredicate = p0[(pSwizzle >> 6) & 0x03]; 434 435 if(!instruction->predicateNot) 436 { 437 if(dst.x) xEnable = xEnable & As<Int4>(xPredicate); 438 if(dst.y) yEnable = yEnable & As<Int4>(yPredicate); 439 if(dst.z) zEnable = zEnable & As<Int4>(zPredicate); 440 if(dst.w) wEnable = wEnable & As<Int4>(wPredicate); 441 } 442 else 443 { 444 if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate); 445 if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate); 446 if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate); 447 if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate); 448 } 449 } 450 451 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable); 452 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable); 453 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable); 454 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable); 455 456 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable)); 457 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable)); 458 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable)); 459 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable)); 460 } 461 462 switch(dst.type) 463 { 464 case Shader::PARAMETER_TEMP: 465 if(dst.rel.type == Shader::PARAMETER_VOID) 466 { 467 if(dst.x) r[dst.index].x = d.x; 468 if(dst.y) r[dst.index].y = d.y; 469 if(dst.z) r[dst.index].z = d.z; 470 if(dst.w) r[dst.index].w = d.w; 471 } 472 else 473 { 474 Int a = relativeAddress(dst); 475 476 if(dst.x) r[dst.index + a].x = d.x; 477 if(dst.y) r[dst.index + a].y = d.y; 478 if(dst.z) r[dst.index + a].z = d.z; 479 if(dst.w) r[dst.index + a].w = d.w; 480 } 481 break; 482 case Shader::PARAMETER_COLOROUT: 483 if(dst.rel.type == Shader::PARAMETER_VOID) 484 { 485 broadcastColor0 = (dst.index == 0) && broadcastColor0; 486 487 if(dst.x) { oC[dst.index].x = d.x; } 488 if(dst.y) { oC[dst.index].y = d.y; } 489 if(dst.z) { oC[dst.index].z = d.z; } 490 if(dst.w) { oC[dst.index].w = d.w; } 491 } 492 else 493 { 494 broadcastColor0 = false; 495 Int a = relativeAddress(dst) + dst.index; 496 497 if(dst.x) { oC[a].x = d.x; } 498 if(dst.y) { oC[a].y = d.y; } 499 if(dst.z) { oC[a].z = d.z; } 500 if(dst.w) { oC[a].w = d.w; } 501 } 502 break; 503 case Shader::PARAMETER_PREDICATE: 504 if(dst.x) p0.x = d.x; 505 if(dst.y) p0.y = d.y; 506 if(dst.z) p0.z = d.z; 507 if(dst.w) p0.w = d.w; 508 break; 509 case Shader::PARAMETER_DEPTHOUT: 510 oDepth = d.x; 511 break; 512 default: 513 ASSERT(false); 514 } 515 } 516 } 517 518 if(currentLabel != -1) 519 { 520 Nucleus::setInsertBlock(returnBlock); 521 } 522 523 if(broadcastColor0) 524 { 525 for(int i = 0; i < RENDERTARGETS; i++) 526 { 527 c[i] = oC[0]; 528 } 529 } 530 else 531 { 532 for(int i = 0; i < RENDERTARGETS; i++) 533 { 534 c[i] = oC[i]; 535 } 536 } 537 } 538 alphaTest(Int cMask[4])539 Bool PixelProgram::alphaTest(Int cMask[4]) 540 { 541 clampColor(c); 542 543 if(!state.alphaTestActive()) 544 { 545 return true; 546 } 547 548 Int aMask; 549 550 if(state.transparencyAntialiasing == TRANSPARENCY_NONE) 551 { 552 Short4 alpha = RoundShort4(c[0].w * Float4(0x1000)); 553 554 PixelRoutine::alphaTest(aMask, alpha); 555 556 for(unsigned int q = 0; q < state.multiSample; q++) 557 { 558 cMask[q] &= aMask; 559 } 560 } 561 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE) 562 { 563 alphaToCoverage(cMask, c[0].w); 564 } 565 else ASSERT(false); 566 567 Int pass = cMask[0]; 568 569 for(unsigned int q = 1; q < state.multiSample; q++) 570 { 571 pass = pass | cMask[q]; 572 } 573 574 return pass != 0x0; 575 } 576 rasterOperation(Float4 & fog,Pointer<Byte> cBuffer[4],Int & x,Int sMask[4],Int zMask[4],Int cMask[4])577 void PixelProgram::rasterOperation(Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4]) 578 { 579 for(int index = 0; index < RENDERTARGETS; index++) 580 { 581 if(!state.colorWriteActive(index)) 582 { 583 continue; 584 } 585 586 if(!postBlendSRGB && state.writeSRGB && !isSRGB(index)) 587 { 588 c[index].x = linearToSRGB(c[index].x); 589 c[index].y = linearToSRGB(c[index].y); 590 c[index].z = linearToSRGB(c[index].z); 591 } 592 593 if(index == 0) 594 { 595 fogBlend(c[index], fog); 596 } 597 598 switch(state.targetFormat[index]) 599 { 600 case FORMAT_R5G6B5: 601 case FORMAT_X8R8G8B8: 602 case FORMAT_X8B8G8R8: 603 case FORMAT_A8R8G8B8: 604 case FORMAT_A8B8G8R8: 605 case FORMAT_SRGB8_X8: 606 case FORMAT_SRGB8_A8: 607 case FORMAT_G8R8: 608 case FORMAT_R8: 609 case FORMAT_A8: 610 case FORMAT_G16R16: 611 case FORMAT_A16B16G16R16: 612 for(unsigned int q = 0; q < state.multiSample; q++) 613 { 614 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 615 Vector4s color; 616 617 if(state.targetFormat[index] == FORMAT_R5G6B5) 618 { 619 color.x = UShort4(c[index].x * Float4(0xFBFF), false); 620 color.y = UShort4(c[index].y * Float4(0xFDFF), false); 621 color.z = UShort4(c[index].z * Float4(0xFBFF), false); 622 color.w = UShort4(c[index].w * Float4(0xFFFF), false); 623 } 624 else 625 { 626 color.x = convertFixed16(c[index].x, false); 627 color.y = convertFixed16(c[index].y, false); 628 color.z = convertFixed16(c[index].z, false); 629 color.w = convertFixed16(c[index].w, false); 630 } 631 632 if(state.multiSampleMask & (1 << q)) 633 { 634 alphaBlend(index, buffer, color, x); 635 logicOperation(index, buffer, color, x); 636 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 637 } 638 } 639 break; 640 case FORMAT_R32F: 641 case FORMAT_G32R32F: 642 case FORMAT_X32B32G32R32F: 643 case FORMAT_A32B32G32R32F: 644 case FORMAT_R32I: 645 case FORMAT_G32R32I: 646 case FORMAT_A32B32G32R32I: 647 case FORMAT_R32UI: 648 case FORMAT_G32R32UI: 649 case FORMAT_A32B32G32R32UI: 650 case FORMAT_R16I: 651 case FORMAT_G16R16I: 652 case FORMAT_A16B16G16R16I: 653 case FORMAT_R16UI: 654 case FORMAT_G16R16UI: 655 case FORMAT_A16B16G16R16UI: 656 case FORMAT_R8I: 657 case FORMAT_G8R8I: 658 case FORMAT_A8B8G8R8I: 659 case FORMAT_R8UI: 660 case FORMAT_G8R8UI: 661 case FORMAT_A8B8G8R8UI: 662 for(unsigned int q = 0; q < state.multiSample; q++) 663 { 664 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(data + OFFSET(DrawData, colorSliceB[index])); 665 Vector4f color = c[index]; 666 667 if(state.multiSampleMask & (1 << q)) 668 { 669 alphaBlend(index, buffer, color, x); 670 writeColor(index, buffer, x, color, sMask[q], zMask[q], cMask[q]); 671 } 672 } 673 break; 674 default: 675 ASSERT(false); 676 } 677 } 678 } 679 sampleTexture(Vector4f & c,const Src & sampler,Vector4f & uvwq,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)680 void PixelProgram::sampleTexture(Vector4f &c, const Src &sampler, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 681 { 682 Vector4f tmp; 683 684 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID) 685 { 686 sampleTexture(tmp, sampler.index, uvwq, dsx, dsy, offset, function); 687 } 688 else 689 { 690 Int index = As<Int>(Float(fetchRegister(sampler).x.x)); 691 692 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++) 693 { 694 if(shader->usesSampler(i)) 695 { 696 If(index == i) 697 { 698 sampleTexture(tmp, i, uvwq, dsx, dsy, offset, function); 699 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture 700 } 701 } 702 } 703 } 704 705 c.x = tmp[(sampler.swizzle >> 0) & 0x3]; 706 c.y = tmp[(sampler.swizzle >> 2) & 0x3]; 707 c.z = tmp[(sampler.swizzle >> 4) & 0x3]; 708 c.w = tmp[(sampler.swizzle >> 6) & 0x3]; 709 } 710 sampleTexture(Vector4f & c,int samplerIndex,Vector4f & uvwq,Vector4f & dsx,Vector4f & dsy,Vector4f & offset,SamplerFunction function)711 void PixelProgram::sampleTexture(Vector4f &c, int samplerIndex, Vector4f &uvwq, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 712 { 713 #if PERF_PROFILE 714 Long texTime = Ticks(); 715 #endif 716 717 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + samplerIndex * sizeof(Texture); 718 sampler[samplerIndex]->sampleTexture(texture, c, uvwq.x, uvwq.y, uvwq.z, uvwq.w, dsx, dsy, offset, function); 719 720 #if PERF_PROFILE 721 cycles[PERF_TEX] += Ticks() - texTime; 722 #endif 723 } 724 clampColor(Vector4f oC[RENDERTARGETS])725 void PixelProgram::clampColor(Vector4f oC[RENDERTARGETS]) 726 { 727 for(int index = 0; index < RENDERTARGETS; index++) 728 { 729 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive())) 730 { 731 continue; 732 } 733 734 switch(state.targetFormat[index]) 735 { 736 case FORMAT_NULL: 737 break; 738 case FORMAT_R5G6B5: 739 case FORMAT_A8R8G8B8: 740 case FORMAT_A8B8G8R8: 741 case FORMAT_X8R8G8B8: 742 case FORMAT_X8B8G8R8: 743 case FORMAT_SRGB8_X8: 744 case FORMAT_SRGB8_A8: 745 case FORMAT_G8R8: 746 case FORMAT_R8: 747 case FORMAT_A8: 748 case FORMAT_G16R16: 749 case FORMAT_A16B16G16R16: 750 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f)); 751 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f)); 752 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f)); 753 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f)); 754 break; 755 case FORMAT_R32F: 756 case FORMAT_G32R32F: 757 case FORMAT_X32B32G32R32F: 758 case FORMAT_A32B32G32R32F: 759 case FORMAT_R32I: 760 case FORMAT_G32R32I: 761 case FORMAT_A32B32G32R32I: 762 case FORMAT_R32UI: 763 case FORMAT_G32R32UI: 764 case FORMAT_A32B32G32R32UI: 765 case FORMAT_R16I: 766 case FORMAT_G16R16I: 767 case FORMAT_A16B16G16R16I: 768 case FORMAT_R16UI: 769 case FORMAT_G16R16UI: 770 case FORMAT_A16B16G16R16UI: 771 case FORMAT_R8I: 772 case FORMAT_G8R8I: 773 case FORMAT_A8B8G8R8I: 774 case FORMAT_R8UI: 775 case FORMAT_G8R8UI: 776 case FORMAT_A8B8G8R8UI: 777 break; 778 default: 779 ASSERT(false); 780 } 781 } 782 } 783 enableMask(const Shader::Instruction * instruction)784 Int4 PixelProgram::enableMask(const Shader::Instruction *instruction) 785 { 786 Int4 enable = instruction->analysisBranch ? Int4(enableStack[enableIndex]) : Int4(0xFFFFFFFF); 787 788 if(!whileTest) 789 { 790 if(shader->containsBreakInstruction() && instruction->analysisBreak) 791 { 792 enable &= enableBreak; 793 } 794 795 if(shader->containsContinueInstruction() && instruction->analysisContinue) 796 { 797 enable &= enableContinue; 798 } 799 800 if(shader->containsLeaveInstruction() && instruction->analysisLeave) 801 { 802 enable &= enableLeave; 803 } 804 } 805 806 return enable; 807 } 808 fetchRegister(const Src & src,unsigned int offset)809 Vector4f PixelProgram::fetchRegister(const Src &src, unsigned int offset) 810 { 811 Vector4f reg; 812 unsigned int i = src.index + offset; 813 814 switch(src.type) 815 { 816 case Shader::PARAMETER_TEMP: 817 if(src.rel.type == Shader::PARAMETER_VOID) 818 { 819 reg = r[i]; 820 } 821 else 822 { 823 Int a = relativeAddress(src, src.bufferIndex); 824 825 reg = r[i + a]; 826 } 827 break; 828 case Shader::PARAMETER_INPUT: 829 { 830 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 831 { 832 reg = v[i]; 833 } 834 else 835 { 836 Int a = relativeAddress(src, src.bufferIndex); 837 838 reg = v[i + a]; 839 } 840 } 841 break; 842 case Shader::PARAMETER_CONST: 843 reg = readConstant(src, offset); 844 break; 845 case Shader::PARAMETER_TEXTURE: 846 reg = v[2 + i]; 847 break; 848 case Shader::PARAMETER_MISCTYPE: 849 if(src.index == Shader::VPosIndex) reg = vPos; 850 if(src.index == Shader::VFaceIndex) reg = vFace; 851 break; 852 case Shader::PARAMETER_SAMPLER: 853 if(src.rel.type == Shader::PARAMETER_VOID) 854 { 855 reg.x = As<Float4>(Int4(i)); 856 } 857 else if(src.rel.type == Shader::PARAMETER_TEMP) 858 { 859 reg.x = As<Float4>(Int4(i) + As<Int4>(r[src.rel.index].x)); 860 } 861 return reg; 862 case Shader::PARAMETER_PREDICATE: return reg; // Dummy 863 case Shader::PARAMETER_VOID: return reg; // Dummy 864 case Shader::PARAMETER_FLOAT4LITERAL: 865 reg.x = Float4(src.value[0]); 866 reg.y = Float4(src.value[1]); 867 reg.z = Float4(src.value[2]); 868 reg.w = Float4(src.value[3]); 869 break; 870 case Shader::PARAMETER_CONSTINT: return reg; // Dummy 871 case Shader::PARAMETER_CONSTBOOL: return reg; // Dummy 872 case Shader::PARAMETER_LOOP: return reg; // Dummy 873 case Shader::PARAMETER_COLOROUT: 874 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 875 { 876 reg = oC[i]; 877 } 878 else 879 { 880 Int a = relativeAddress(src, src.bufferIndex); 881 882 reg = oC[i + a]; 883 } 884 break; 885 case Shader::PARAMETER_DEPTHOUT: 886 reg.x = oDepth; 887 break; 888 default: 889 ASSERT(false); 890 } 891 892 const Float4 &x = reg[(src.swizzle >> 0) & 0x3]; 893 const Float4 &y = reg[(src.swizzle >> 2) & 0x3]; 894 const Float4 &z = reg[(src.swizzle >> 4) & 0x3]; 895 const Float4 &w = reg[(src.swizzle >> 6) & 0x3]; 896 897 Vector4f mod; 898 899 switch(src.modifier) 900 { 901 case Shader::MODIFIER_NONE: 902 mod.x = x; 903 mod.y = y; 904 mod.z = z; 905 mod.w = w; 906 break; 907 case Shader::MODIFIER_NEGATE: 908 mod.x = -x; 909 mod.y = -y; 910 mod.z = -z; 911 mod.w = -w; 912 break; 913 case Shader::MODIFIER_ABS: 914 mod.x = Abs(x); 915 mod.y = Abs(y); 916 mod.z = Abs(z); 917 mod.w = Abs(w); 918 break; 919 case Shader::MODIFIER_ABS_NEGATE: 920 mod.x = -Abs(x); 921 mod.y = -Abs(y); 922 mod.z = -Abs(z); 923 mod.w = -Abs(w); 924 break; 925 case Shader::MODIFIER_NOT: 926 mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF)); 927 mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF)); 928 mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF)); 929 mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF)); 930 break; 931 default: 932 ASSERT(false); 933 } 934 935 return mod; 936 } 937 uniformAddress(int bufferIndex,unsigned int index)938 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index) 939 { 940 if(bufferIndex == -1) 941 { 942 return data + OFFSET(DrawData, ps.c[index]); 943 } 944 else 945 { 946 return *Pointer<Pointer<Byte>>(data + OFFSET(DrawData, ps.u[bufferIndex])) + index; 947 } 948 } 949 uniformAddress(int bufferIndex,unsigned int index,Int & offset)950 RValue<Pointer<Byte>> PixelProgram::uniformAddress(int bufferIndex, unsigned int index, Int& offset) 951 { 952 return uniformAddress(bufferIndex, index) + offset * sizeof(float4); 953 } 954 readConstant(const Src & src,unsigned int offset)955 Vector4f PixelProgram::readConstant(const Src &src, unsigned int offset) 956 { 957 Vector4f c; 958 unsigned int i = src.index + offset; 959 960 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative 961 { 962 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i)); 963 964 c.x = c.x.xxxx; 965 c.y = c.y.yyyy; 966 c.z = c.z.zzzz; 967 c.w = c.w.wwww; 968 969 if(shader->containsDefineInstruction()) // Constant may be known at compile time 970 { 971 for(size_t j = 0; j < shader->getLength(); j++) 972 { 973 const Shader::Instruction &instruction = *shader->getInstruction(j); 974 975 if(instruction.opcode == Shader::OPCODE_DEF) 976 { 977 if(instruction.dst.index == i) 978 { 979 c.x = Float4(instruction.src[0].value[0]); 980 c.y = Float4(instruction.src[0].value[1]); 981 c.z = Float4(instruction.src[0].value[2]); 982 c.w = Float4(instruction.src[0].value[3]); 983 984 break; 985 } 986 } 987 } 988 } 989 } 990 else if(src.rel.type == Shader::PARAMETER_LOOP) 991 { 992 Int loopCounter = aL[loopDepth]; 993 994 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, loopCounter)); 995 996 c.x = c.x.xxxx; 997 c.y = c.y.yyyy; 998 c.z = c.z.zzzz; 999 c.w = c.w.wwww; 1000 } 1001 else 1002 { 1003 Int a = relativeAddress(src, src.bufferIndex); 1004 1005 c.x = c.y = c.z = c.w = *Pointer<Float4>(uniformAddress(src.bufferIndex, i, a)); 1006 1007 c.x = c.x.xxxx; 1008 c.y = c.y.yyyy; 1009 c.z = c.z.zzzz; 1010 c.w = c.w.wwww; 1011 } 1012 1013 return c; 1014 } 1015 relativeAddress(const Shader::Parameter & var,int bufferIndex)1016 Int PixelProgram::relativeAddress(const Shader::Parameter &var, int bufferIndex) 1017 { 1018 ASSERT(var.rel.deterministic); 1019 1020 if(var.rel.type == Shader::PARAMETER_TEMP) 1021 { 1022 return As<Int>(Extract(r[var.rel.index].x, 0)) * var.rel.scale; 1023 } 1024 else if(var.rel.type == Shader::PARAMETER_INPUT) 1025 { 1026 return As<Int>(Extract(v[var.rel.index].x, 0)) * var.rel.scale; 1027 } 1028 else if(var.rel.type == Shader::PARAMETER_OUTPUT) 1029 { 1030 return As<Int>(Extract(oC[var.rel.index].x, 0)) * var.rel.scale; 1031 } 1032 else if(var.rel.type == Shader::PARAMETER_CONST) 1033 { 1034 return *Pointer<Int>(uniformAddress(bufferIndex, var.rel.index)) * var.rel.scale; 1035 } 1036 else if(var.rel.type == Shader::PARAMETER_LOOP) 1037 { 1038 return aL[loopDepth]; 1039 } 1040 else ASSERT(false); 1041 1042 return 0; 1043 } 1044 linearToSRGB(const Float4 & x)1045 Float4 PixelProgram::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2) 1046 { 1047 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x)); 1048 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f); 1049 1050 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f)); 1051 } 1052 M3X2(Vector4f & dst,Vector4f & src0,const Src & src1)1053 void PixelProgram::M3X2(Vector4f &dst, Vector4f &src0, const Src &src1) 1054 { 1055 Vector4f row0 = fetchRegister(src1, 0); 1056 Vector4f row1 = fetchRegister(src1, 1); 1057 1058 dst.x = dot3(src0, row0); 1059 dst.y = dot3(src0, row1); 1060 } 1061 M3X3(Vector4f & dst,Vector4f & src0,const Src & src1)1062 void PixelProgram::M3X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1063 { 1064 Vector4f row0 = fetchRegister(src1, 0); 1065 Vector4f row1 = fetchRegister(src1, 1); 1066 Vector4f row2 = fetchRegister(src1, 2); 1067 1068 dst.x = dot3(src0, row0); 1069 dst.y = dot3(src0, row1); 1070 dst.z = dot3(src0, row2); 1071 } 1072 M3X4(Vector4f & dst,Vector4f & src0,const Src & src1)1073 void PixelProgram::M3X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1074 { 1075 Vector4f row0 = fetchRegister(src1, 0); 1076 Vector4f row1 = fetchRegister(src1, 1); 1077 Vector4f row2 = fetchRegister(src1, 2); 1078 Vector4f row3 = fetchRegister(src1, 3); 1079 1080 dst.x = dot3(src0, row0); 1081 dst.y = dot3(src0, row1); 1082 dst.z = dot3(src0, row2); 1083 dst.w = dot3(src0, row3); 1084 } 1085 M4X3(Vector4f & dst,Vector4f & src0,const Src & src1)1086 void PixelProgram::M4X3(Vector4f &dst, Vector4f &src0, const Src &src1) 1087 { 1088 Vector4f row0 = fetchRegister(src1, 0); 1089 Vector4f row1 = fetchRegister(src1, 1); 1090 Vector4f row2 = fetchRegister(src1, 2); 1091 1092 dst.x = dot4(src0, row0); 1093 dst.y = dot4(src0, row1); 1094 dst.z = dot4(src0, row2); 1095 } 1096 M4X4(Vector4f & dst,Vector4f & src0,const Src & src1)1097 void PixelProgram::M4X4(Vector4f &dst, Vector4f &src0, const Src &src1) 1098 { 1099 Vector4f row0 = fetchRegister(src1, 0); 1100 Vector4f row1 = fetchRegister(src1, 1); 1101 Vector4f row2 = fetchRegister(src1, 2); 1102 Vector4f row3 = fetchRegister(src1, 3); 1103 1104 dst.x = dot4(src0, row0); 1105 dst.y = dot4(src0, row1); 1106 dst.z = dot4(src0, row2); 1107 dst.w = dot4(src0, row3); 1108 } 1109 TEXLD(Vector4f & dst,Vector4f & src0,const Src & src1,bool project,bool bias)1110 void PixelProgram::TEXLD(Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias) 1111 { 1112 if(project) 1113 { 1114 Vector4f proj; 1115 Float4 rw = reciprocal(src0.w); 1116 proj.x = src0.x * rw; 1117 proj.y = src0.y * rw; 1118 proj.z = src0.z * rw; 1119 1120 sampleTexture(dst, src1, proj, src0, src0, src0, Implicit); 1121 } 1122 else 1123 { 1124 sampleTexture(dst, src1, src0, src0, src0, src0, bias ? Bias : Implicit); 1125 } 1126 } 1127 TEXOFFSET(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2,bool bias)1128 void PixelProgram::TEXOFFSET(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, bool bias) 1129 { 1130 sampleTexture(dst, src1, src0, src0, src0, src2, {bias ? Bias : Implicit, Offset}); 1131 } 1132 TEXLDL(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset,bool bias)1133 void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &offset, bool bias) 1134 { 1135 sampleTexture(dst, src1, src0, src0, src0, offset, {Lod, Offset}); 1136 } 1137 TEXELFETCH(Vector4f & dst,Vector4f & src0,const Src & src1)1138 void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1) 1139 { 1140 sampleTexture(dst, src1, src0, src0, src0, src0, Fetch); 1141 } 1142 TEXELFETCH(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & offset)1143 void PixelProgram::TEXELFETCH(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &offset) 1144 { 1145 sampleTexture(dst, src1, src0, src0, src0, offset, {Fetch, Offset}); 1146 } 1147 TEXGRAD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2,Vector4f & src3)1148 void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3) 1149 { 1150 sampleTexture(dst, src1, src0, src2, src3, src0, Grad); 1151 } 1152 TEXGRAD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2,Vector4f & src3,Vector4f & offset)1153 void PixelProgram::TEXGRAD(Vector4f &dst, Vector4f &src0, const Src& src1, Vector4f &src2, Vector4f &src3, Vector4f &offset) 1154 { 1155 sampleTexture(dst, src1, src0, src2, src3, offset, {Grad, Offset}); 1156 } 1157 TEXLDD(Vector4f & dst,Vector4f & src0,const Src & src1,Vector4f & src2,Vector4f & src3)1158 void PixelProgram::TEXLDD(Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3) 1159 { 1160 sampleTexture(dst, src1, src0, src2, src3, src0, Grad); 1161 } 1162 TEXLDL(Vector4f & dst,Vector4f & src0,const Src & src1)1163 void PixelProgram::TEXLDL(Vector4f &dst, Vector4f &src0, const Src &src1) 1164 { 1165 sampleTexture(dst, src1, src0, src0, src0, src0, Lod); 1166 } 1167 TEXSIZE(Vector4f & dst,Float4 & lod,const Src & src1)1168 void PixelProgram::TEXSIZE(Vector4f &dst, Float4 &lod, const Src &src1) 1169 { 1170 Pointer<Byte> texture = data + OFFSET(DrawData, mipmap) + src1.index * sizeof(Texture); 1171 sampler[src1.index]->textureSize(texture, dst, lod); 1172 } 1173 TEXKILL(Int cMask[4],Vector4f & src,unsigned char mask)1174 void PixelProgram::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask) 1175 { 1176 Int kill = -1; 1177 1178 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f))); 1179 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f))); 1180 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f))); 1181 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f))); 1182 1183 // FIXME: Dynamic branching affects TEXKILL? 1184 // if(shader->containsDynamicBranching()) 1185 // { 1186 // kill = ~SignMask(enableMask()); 1187 // } 1188 1189 for(unsigned int q = 0; q < state.multiSample; q++) 1190 { 1191 cMask[q] &= kill; 1192 } 1193 1194 // FIXME: Branch to end of shader if all killed? 1195 } 1196 DISCARD(Int cMask[4],const Shader::Instruction * instruction)1197 void PixelProgram::DISCARD(Int cMask[4], const Shader::Instruction *instruction) 1198 { 1199 Int kill = 0; 1200 1201 if(shader->containsDynamicBranching()) 1202 { 1203 kill = ~SignMask(enableMask(instruction)); 1204 } 1205 1206 for(unsigned int q = 0; q < state.multiSample; q++) 1207 { 1208 cMask[q] &= kill; 1209 } 1210 1211 // FIXME: Branch to end of shader if all killed? 1212 } 1213 DFDX(Vector4f & dst,Vector4f & src)1214 void PixelProgram::DFDX(Vector4f &dst, Vector4f &src) 1215 { 1216 dst.x = src.x.yyww - src.x.xxzz; 1217 dst.y = src.y.yyww - src.y.xxzz; 1218 dst.z = src.z.yyww - src.z.xxzz; 1219 dst.w = src.w.yyww - src.w.xxzz; 1220 } 1221 DFDY(Vector4f & dst,Vector4f & src)1222 void PixelProgram::DFDY(Vector4f &dst, Vector4f &src) 1223 { 1224 dst.x = src.x.zwzw - src.x.xyxy; 1225 dst.y = src.y.zwzw - src.y.xyxy; 1226 dst.z = src.z.zwzw - src.z.xyxy; 1227 dst.w = src.w.zwzw - src.w.xyxy; 1228 } 1229 FWIDTH(Vector4f & dst,Vector4f & src)1230 void PixelProgram::FWIDTH(Vector4f &dst, Vector4f &src) 1231 { 1232 // abs(dFdx(src)) + abs(dFdy(src)); 1233 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy); 1234 dst.y = Abs(src.y.yyww - src.y.xxzz) + Abs(src.y.zwzw - src.y.xyxy); 1235 dst.z = Abs(src.z.yyww - src.z.xxzz) + Abs(src.z.zwzw - src.z.xyxy); 1236 dst.w = Abs(src.w.yyww - src.w.xxzz) + Abs(src.w.zwzw - src.w.xyxy); 1237 } 1238 BREAK()1239 void PixelProgram::BREAK() 1240 { 1241 BasicBlock *deadBlock = Nucleus::createBasicBlock(); 1242 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; 1243 1244 if(breakDepth == 0) 1245 { 1246 enableIndex = enableIndex - breakDepth; 1247 Nucleus::createBr(endBlock); 1248 } 1249 else 1250 { 1251 enableBreak = enableBreak & ~enableStack[enableIndex]; 1252 Bool allBreak = SignMask(enableBreak) == 0x0; 1253 1254 enableIndex = enableIndex - breakDepth; 1255 branch(allBreak, endBlock, deadBlock); 1256 } 1257 1258 Nucleus::setInsertBlock(deadBlock); 1259 enableIndex = enableIndex + breakDepth; 1260 } 1261 BREAKC(Vector4f & src0,Vector4f & src1,Control control)1262 void PixelProgram::BREAKC(Vector4f &src0, Vector4f &src1, Control control) 1263 { 1264 Int4 condition; 1265 1266 switch(control) 1267 { 1268 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1269 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1270 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1271 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1272 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1273 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1274 default: 1275 ASSERT(false); 1276 } 1277 1278 BREAK(condition); 1279 } 1280 BREAKP(const Src & predicateRegister)1281 void PixelProgram::BREAKP(const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC 1282 { 1283 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1284 1285 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1286 { 1287 condition = ~condition; 1288 } 1289 1290 BREAK(condition); 1291 } 1292 BREAK(Int4 & condition)1293 void PixelProgram::BREAK(Int4 &condition) 1294 { 1295 condition &= enableStack[enableIndex]; 1296 1297 BasicBlock *continueBlock = Nucleus::createBasicBlock(); 1298 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1]; 1299 1300 enableBreak = enableBreak & ~condition; 1301 Bool allBreak = SignMask(enableBreak) == 0x0; 1302 1303 enableIndex = enableIndex - breakDepth; 1304 branch(allBreak, endBlock, continueBlock); 1305 1306 Nucleus::setInsertBlock(continueBlock); 1307 enableIndex = enableIndex + breakDepth; 1308 } 1309 CONTINUE()1310 void PixelProgram::CONTINUE() 1311 { 1312 enableContinue = enableContinue & ~enableStack[enableIndex]; 1313 } 1314 TEST()1315 void PixelProgram::TEST() 1316 { 1317 whileTest = true; 1318 } 1319 CALL(int labelIndex,int callSiteIndex)1320 void PixelProgram::CALL(int labelIndex, int callSiteIndex) 1321 { 1322 if(!labelBlock[labelIndex]) 1323 { 1324 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1325 } 1326 1327 if(callRetBlock[labelIndex].size() > 1) 1328 { 1329 callStack[stackIndex++] = UInt(callSiteIndex); 1330 } 1331 1332 Int4 restoreLeave = enableLeave; 1333 1334 Nucleus::createBr(labelBlock[labelIndex]); 1335 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1336 1337 enableLeave = restoreLeave; 1338 } 1339 CALLNZ(int labelIndex,int callSiteIndex,const Src & src)1340 void PixelProgram::CALLNZ(int labelIndex, int callSiteIndex, const Src &src) 1341 { 1342 if(src.type == Shader::PARAMETER_CONSTBOOL) 1343 { 1344 CALLNZb(labelIndex, callSiteIndex, src); 1345 } 1346 else if(src.type == Shader::PARAMETER_PREDICATE) 1347 { 1348 CALLNZp(labelIndex, callSiteIndex, src); 1349 } 1350 else ASSERT(false); 1351 } 1352 CALLNZb(int labelIndex,int callSiteIndex,const Src & boolRegister)1353 void PixelProgram::CALLNZb(int labelIndex, int callSiteIndex, const Src &boolRegister) 1354 { 1355 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1356 1357 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1358 { 1359 condition = !condition; 1360 } 1361 1362 if(!labelBlock[labelIndex]) 1363 { 1364 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1365 } 1366 1367 if(callRetBlock[labelIndex].size() > 1) 1368 { 1369 callStack[stackIndex++] = UInt(callSiteIndex); 1370 } 1371 1372 Int4 restoreLeave = enableLeave; 1373 1374 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1375 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1376 1377 enableLeave = restoreLeave; 1378 } 1379 CALLNZp(int labelIndex,int callSiteIndex,const Src & predicateRegister)1380 void PixelProgram::CALLNZp(int labelIndex, int callSiteIndex, const Src &predicateRegister) 1381 { 1382 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1383 1384 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1385 { 1386 condition = ~condition; 1387 } 1388 1389 condition &= enableStack[enableIndex]; 1390 1391 if(!labelBlock[labelIndex]) 1392 { 1393 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1394 } 1395 1396 if(callRetBlock[labelIndex].size() > 1) 1397 { 1398 callStack[stackIndex++] = UInt(callSiteIndex); 1399 } 1400 1401 enableIndex++; 1402 enableStack[enableIndex] = condition; 1403 Int4 restoreLeave = enableLeave; 1404 1405 Bool notAllFalse = SignMask(condition) != 0; 1406 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]); 1407 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]); 1408 1409 enableIndex--; 1410 enableLeave = restoreLeave; 1411 } 1412 ELSE()1413 void PixelProgram::ELSE() 1414 { 1415 ifDepth--; 1416 1417 BasicBlock *falseBlock = ifFalseBlock[ifDepth]; 1418 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1419 1420 if(isConditionalIf[ifDepth]) 1421 { 1422 Int4 condition = ~enableStack[enableIndex] & enableStack[enableIndex - 1]; 1423 Bool notAllFalse = SignMask(condition) != 0; 1424 1425 branch(notAllFalse, falseBlock, endBlock); 1426 1427 enableStack[enableIndex] = ~enableStack[enableIndex] & enableStack[enableIndex - 1]; 1428 } 1429 else 1430 { 1431 Nucleus::createBr(endBlock); 1432 Nucleus::setInsertBlock(falseBlock); 1433 } 1434 1435 ifFalseBlock[ifDepth] = endBlock; 1436 1437 ifDepth++; 1438 } 1439 ENDIF()1440 void PixelProgram::ENDIF() 1441 { 1442 ifDepth--; 1443 1444 BasicBlock *endBlock = ifFalseBlock[ifDepth]; 1445 1446 Nucleus::createBr(endBlock); 1447 Nucleus::setInsertBlock(endBlock); 1448 1449 if(isConditionalIf[ifDepth]) 1450 { 1451 breakDepth--; 1452 enableIndex--; 1453 } 1454 } 1455 ENDLOOP()1456 void PixelProgram::ENDLOOP() 1457 { 1458 loopRepDepth--; 1459 1460 aL[loopDepth] = aL[loopDepth] + increment[loopDepth]; // FIXME: += 1461 1462 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1463 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1464 1465 Nucleus::createBr(testBlock); 1466 Nucleus::setInsertBlock(endBlock); 1467 1468 loopDepth--; 1469 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1470 } 1471 ENDREP()1472 void PixelProgram::ENDREP() 1473 { 1474 loopRepDepth--; 1475 1476 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1477 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1478 1479 Nucleus::createBr(testBlock); 1480 Nucleus::setInsertBlock(endBlock); 1481 1482 loopDepth--; 1483 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1484 } 1485 ENDWHILE()1486 void PixelProgram::ENDWHILE() 1487 { 1488 loopRepDepth--; 1489 1490 BasicBlock *testBlock = loopRepTestBlock[loopRepDepth]; 1491 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1492 1493 Nucleus::createBr(testBlock); 1494 Nucleus::setInsertBlock(endBlock); 1495 1496 enableIndex--; 1497 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1498 whileTest = false; 1499 } 1500 ENDSWITCH()1501 void PixelProgram::ENDSWITCH() 1502 { 1503 loopRepDepth--; 1504 1505 BasicBlock *endBlock = loopRepEndBlock[loopRepDepth]; 1506 1507 Nucleus::createBr(loopRepEndBlock[loopRepDepth]); 1508 Nucleus::setInsertBlock(endBlock); 1509 1510 enableIndex--; 1511 enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); 1512 } 1513 IF(const Src & src)1514 void PixelProgram::IF(const Src &src) 1515 { 1516 if(src.type == Shader::PARAMETER_CONSTBOOL) 1517 { 1518 IFb(src); 1519 } 1520 else if(src.type == Shader::PARAMETER_PREDICATE) 1521 { 1522 IFp(src); 1523 } 1524 else 1525 { 1526 Int4 condition = As<Int4>(fetchRegister(src).x); 1527 IF(condition); 1528 } 1529 } 1530 IFb(const Src & boolRegister)1531 void PixelProgram::IFb(const Src &boolRegister) 1532 { 1533 ASSERT(ifDepth < 24 + 4); 1534 1535 Bool condition = (*Pointer<Byte>(data + OFFSET(DrawData, ps.b[boolRegister.index])) != Byte(0)); // FIXME 1536 1537 if(boolRegister.modifier == Shader::MODIFIER_NOT) 1538 { 1539 condition = !condition; 1540 } 1541 1542 BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1543 BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1544 1545 branch(condition, trueBlock, falseBlock); 1546 1547 isConditionalIf[ifDepth] = false; 1548 ifFalseBlock[ifDepth] = falseBlock; 1549 1550 ifDepth++; 1551 } 1552 IFp(const Src & predicateRegister)1553 void PixelProgram::IFp(const Src &predicateRegister) 1554 { 1555 Int4 condition = As<Int4>(p0[predicateRegister.swizzle & 0x3]); 1556 1557 if(predicateRegister.modifier == Shader::MODIFIER_NOT) 1558 { 1559 condition = ~condition; 1560 } 1561 1562 IF(condition); 1563 } 1564 IFC(Vector4f & src0,Vector4f & src1,Control control)1565 void PixelProgram::IFC(Vector4f &src0, Vector4f &src1, Control control) 1566 { 1567 Int4 condition; 1568 1569 switch(control) 1570 { 1571 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break; 1572 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break; 1573 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break; 1574 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break; 1575 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break; 1576 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break; 1577 default: 1578 ASSERT(false); 1579 } 1580 1581 IF(condition); 1582 } 1583 IF(Int4 & condition)1584 void PixelProgram::IF(Int4 &condition) 1585 { 1586 condition &= enableStack[enableIndex]; 1587 1588 enableIndex++; 1589 enableStack[enableIndex] = condition; 1590 1591 BasicBlock *trueBlock = Nucleus::createBasicBlock(); 1592 BasicBlock *falseBlock = Nucleus::createBasicBlock(); 1593 1594 Bool notAllFalse = SignMask(condition) != 0; 1595 1596 branch(notAllFalse, trueBlock, falseBlock); 1597 1598 isConditionalIf[ifDepth] = true; 1599 ifFalseBlock[ifDepth] = falseBlock; 1600 1601 ifDepth++; 1602 breakDepth++; 1603 } 1604 LABEL(int labelIndex)1605 void PixelProgram::LABEL(int labelIndex) 1606 { 1607 if(!labelBlock[labelIndex]) 1608 { 1609 labelBlock[labelIndex] = Nucleus::createBasicBlock(); 1610 } 1611 1612 Nucleus::setInsertBlock(labelBlock[labelIndex]); 1613 currentLabel = labelIndex; 1614 } 1615 LOOP(const Src & integerRegister)1616 void PixelProgram::LOOP(const Src &integerRegister) 1617 { 1618 loopDepth++; 1619 1620 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1621 aL[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][1])); 1622 increment[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][2])); 1623 1624 // If(increment[loopDepth] == 0) 1625 // { 1626 // increment[loopDepth] = 1; 1627 // } 1628 1629 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1630 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1631 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1632 1633 loopRepTestBlock[loopRepDepth] = testBlock; 1634 loopRepEndBlock[loopRepDepth] = endBlock; 1635 1636 // FIXME: jump(testBlock) 1637 Nucleus::createBr(testBlock); 1638 Nucleus::setInsertBlock(testBlock); 1639 1640 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1641 Nucleus::setInsertBlock(loopBlock); 1642 1643 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1644 1645 loopRepDepth++; 1646 breakDepth = 0; 1647 } 1648 REP(const Src & integerRegister)1649 void PixelProgram::REP(const Src &integerRegister) 1650 { 1651 loopDepth++; 1652 1653 iteration[loopDepth] = *Pointer<Int>(data + OFFSET(DrawData, ps.i[integerRegister.index][0])); 1654 aL[loopDepth] = aL[loopDepth - 1]; 1655 1656 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1657 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1658 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1659 1660 loopRepTestBlock[loopRepDepth] = testBlock; 1661 loopRepEndBlock[loopRepDepth] = endBlock; 1662 1663 // FIXME: jump(testBlock) 1664 Nucleus::createBr(testBlock); 1665 Nucleus::setInsertBlock(testBlock); 1666 1667 branch(iteration[loopDepth] > 0, loopBlock, endBlock); 1668 Nucleus::setInsertBlock(loopBlock); 1669 1670 iteration[loopDepth] = iteration[loopDepth] - 1; // FIXME: -- 1671 1672 loopRepDepth++; 1673 breakDepth = 0; 1674 } 1675 WHILE(const Src & temporaryRegister)1676 void PixelProgram::WHILE(const Src &temporaryRegister) 1677 { 1678 enableIndex++; 1679 1680 BasicBlock *loopBlock = Nucleus::createBasicBlock(); 1681 BasicBlock *testBlock = Nucleus::createBasicBlock(); 1682 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1683 1684 loopRepTestBlock[loopRepDepth] = testBlock; 1685 loopRepEndBlock[loopRepDepth] = endBlock; 1686 1687 Int4 restoreBreak = enableBreak; 1688 Int4 restoreContinue = enableContinue; 1689 1690 // FIXME: jump(testBlock) 1691 Nucleus::createBr(testBlock); 1692 Nucleus::setInsertBlock(testBlock); 1693 enableContinue = restoreContinue; 1694 1695 const Vector4f &src = fetchRegister(temporaryRegister); 1696 Int4 condition = As<Int4>(src.x); 1697 condition &= enableStack[enableIndex - 1]; 1698 if(shader->containsLeaveInstruction()) condition &= enableLeave; 1699 enableStack[enableIndex] = condition; 1700 1701 Bool notAllFalse = SignMask(condition) != 0; 1702 branch(notAllFalse, loopBlock, endBlock); 1703 1704 Nucleus::setInsertBlock(endBlock); 1705 enableBreak = restoreBreak; 1706 1707 Nucleus::setInsertBlock(loopBlock); 1708 1709 loopRepDepth++; 1710 breakDepth = 0; 1711 } 1712 SWITCH()1713 void PixelProgram::SWITCH() 1714 { 1715 enableIndex++; 1716 enableStack[enableIndex] = Int4(0xFFFFFFFF); 1717 1718 BasicBlock *endBlock = Nucleus::createBasicBlock(); 1719 1720 loopRepTestBlock[loopRepDepth] = nullptr; 1721 loopRepEndBlock[loopRepDepth] = endBlock; 1722 1723 loopRepDepth++; 1724 breakDepth = 0; 1725 } 1726 RET()1727 void PixelProgram::RET() 1728 { 1729 if(currentLabel == -1) 1730 { 1731 returnBlock = Nucleus::createBasicBlock(); 1732 Nucleus::createBr(returnBlock); 1733 } 1734 else 1735 { 1736 BasicBlock *unreachableBlock = Nucleus::createBasicBlock(); 1737 1738 if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack 1739 { 1740 // FIXME: Encapsulate 1741 UInt index = callStack[--stackIndex]; 1742 1743 Value *value = index.loadValue(); 1744 SwitchCases *switchCases = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size()); 1745 1746 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++) 1747 { 1748 Nucleus::addSwitchCase(switchCases, i, callRetBlock[currentLabel][i]); 1749 } 1750 } 1751 else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination 1752 { 1753 Nucleus::createBr(callRetBlock[currentLabel][0]); 1754 } 1755 else // Function isn't called 1756 { 1757 Nucleus::createBr(unreachableBlock); 1758 } 1759 1760 Nucleus::setInsertBlock(unreachableBlock); 1761 Nucleus::createUnreachable(); 1762 } 1763 } 1764 LEAVE()1765 void PixelProgram::LEAVE() 1766 { 1767 enableLeave = enableLeave & ~enableStack[enableIndex]; 1768 1769 // FIXME: Return from function if all instances left 1770 // FIXME: Use enableLeave in other control-flow constructs 1771 } 1772 } 1773