1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "QuadRasterizer.hpp" 16 17 #include "Primitive.hpp" 18 #include "Renderer.hpp" 19 #include "Shader/Constants.hpp" 20 #include "Common/Math.hpp" 21 #include "Common/Debug.hpp" 22 23 namespace sw 24 { 25 extern bool veryEarlyDepthTest; 26 extern bool complementaryDepthBuffer; 27 extern bool fullPixelPositionRegister; 28 29 extern int clusterCount; 30 QuadRasterizer(const PixelProcessor::State & state,const PixelShader * pixelShader)31 QuadRasterizer::QuadRasterizer(const PixelProcessor::State &state, const PixelShader *pixelShader) : state(state), shader(pixelShader) 32 { 33 } 34 ~QuadRasterizer()35 QuadRasterizer::~QuadRasterizer() 36 { 37 } 38 generate()39 void QuadRasterizer::generate() 40 { 41 #if PERF_PROFILE 42 for(int i = 0; i < PERF_TIMERS; i++) 43 { 44 cycles[i] = 0; 45 } 46 47 Long pixelTime = Ticks(); 48 #endif 49 50 constants = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,constants)); 51 occlusion = 0; 52 int clusterCount = Renderer::getClusterCount(); 53 54 Do 55 { 56 Int yMin = *Pointer<Int>(primitive + OFFSET(Primitive,yMin)); 57 Int yMax = *Pointer<Int>(primitive + OFFSET(Primitive,yMax)); 58 59 Int cluster2 = cluster + cluster; 60 yMin += clusterCount * 2 - 2 - cluster2; 61 yMin &= -clusterCount * 2; 62 yMin += cluster2; 63 64 If(yMin < yMax) 65 { 66 rasterize(yMin, yMax); 67 } 68 69 primitive += sizeof(Primitive) * state.multiSample; 70 count--; 71 } 72 Until(count == 0) 73 74 if(state.occlusionEnabled) 75 { 76 UInt clusterOcclusion = *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster); 77 clusterOcclusion += occlusion; 78 *Pointer<UInt>(data + OFFSET(DrawData,occlusion) + 4 * cluster) = clusterOcclusion; 79 } 80 81 #if PERF_PROFILE 82 cycles[PERF_PIXEL] = Ticks() - pixelTime; 83 84 for(int i = 0; i < PERF_TIMERS; i++) 85 { 86 *Pointer<Long>(data + OFFSET(DrawData,cycles[i]) + 8 * cluster) += cycles[i]; 87 } 88 #endif 89 90 Return(); 91 } 92 rasterize(Int & yMin,Int & yMax)93 void QuadRasterizer::rasterize(Int &yMin, Int &yMax) 94 { 95 Pointer<Byte> cBuffer[RENDERTARGETS]; 96 Pointer<Byte> zBuffer; 97 Pointer<Byte> sBuffer; 98 99 for(int index = 0; index < RENDERTARGETS; index++) 100 { 101 if(state.colorWriteActive(index)) 102 { 103 cBuffer[index] = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,colorBuffer[index])) + yMin * *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])); 104 } 105 } 106 107 if(state.depthTestActive) 108 { 109 zBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,depthBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 110 } 111 112 if(state.stencilActive) 113 { 114 sBuffer = *Pointer<Pointer<Byte>>(data + OFFSET(DrawData,stencilBuffer)) + yMin * *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)); 115 } 116 117 Int y = yMin; 118 119 Do 120 { 121 Int x0a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); 122 Int x0b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); 123 Int x0 = Min(x0a, x0b); 124 125 for(unsigned int q = 1; q < state.multiSample; q++) 126 { 127 x0a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 0) * sizeof(Primitive::Span))); 128 x0b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->left) + (y + 1) * sizeof(Primitive::Span))); 129 x0 = Min(x0, Min(x0a, x0b)); 130 } 131 132 x0 &= 0xFFFFFFFE; 133 134 Int x1a = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); 135 Int x1b = Int(*Pointer<Short>(primitive + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); 136 Int x1 = Max(x1a, x1b); 137 138 for(unsigned int q = 1; q < state.multiSample; q++) 139 { 140 x1a = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 0) * sizeof(Primitive::Span))); 141 x1b = Int(*Pointer<Short>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline->right) + (y + 1) * sizeof(Primitive::Span))); 142 x1 = Max(x1, Max(x1a, x1b)); 143 } 144 145 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16); 146 147 if(interpolateZ()) 148 { 149 for(unsigned int q = 0; q < state.multiSample; q++) 150 { 151 Float4 y = yyyy; 152 153 if(state.multiSample > 1) 154 { 155 y -= *Pointer<Float4>(constants + OFFSET(Constants,Y) + q * sizeof(float4)); 156 } 157 158 Dz[q] = *Pointer<Float4>(primitive + OFFSET(Primitive,z.C), 16) + y * *Pointer<Float4>(primitive + OFFSET(Primitive,z.B), 16); 159 } 160 } 161 162 if(veryEarlyDepthTest && state.multiSample == 1 && !state.depthOverride) 163 { 164 if(!state.stencilActive && state.depthTestActive && (state.depthCompareMode == DEPTH_LESSEQUAL || state.depthCompareMode == DEPTH_LESS)) // FIXME: Both modes ok? 165 { 166 Float4 xxxx = Float4(Float(x0)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16); 167 168 Pointer<Byte> buffer; 169 Int pitch; 170 171 if(!state.quadLayoutDepthBuffer) 172 { 173 buffer = zBuffer + 4 * x0; 174 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)); 175 } 176 else 177 { 178 buffer = zBuffer + 8 * x0; 179 } 180 181 For(Int x = x0, x < x1, x += 2) 182 { 183 Float4 z = interpolate(xxxx, Dz[0], z, primitive + OFFSET(Primitive,z), false, false, state.depthClamp); 184 185 Float4 zValue; 186 187 if(!state.quadLayoutDepthBuffer) 188 { 189 // FIXME: Properly optimizes? 190 zValue.xy = *Pointer<Float4>(buffer); 191 zValue.zw = *Pointer<Float4>(buffer + pitch - 8); 192 } 193 else 194 { 195 zValue = *Pointer<Float4>(buffer, 16); 196 } 197 198 Int4 zTest; 199 200 if(complementaryDepthBuffer) 201 { 202 zTest = CmpLE(zValue, z); 203 } 204 else 205 { 206 zTest = CmpNLT(zValue, z); 207 } 208 209 Int zMask = SignMask(zTest); 210 211 If(zMask == 0) 212 { 213 x0 += 2; 214 } 215 Else 216 { 217 x = x1; 218 } 219 220 xxxx += Float4(2); 221 222 if(!state.quadLayoutDepthBuffer) 223 { 224 buffer += 8; 225 } 226 else 227 { 228 buffer += 16; 229 } 230 } 231 } 232 } 233 234 If(x0 < x1) 235 { 236 if(interpolateW()) 237 { 238 Dw = *Pointer<Float4>(primitive + OFFSET(Primitive,w.C), 16) + yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,w.B), 16); 239 } 240 241 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++) 242 { 243 for(int component = 0; component < 4; component++) 244 { 245 if(state.interpolant[interpolant].component & (1 << component)) 246 { 247 Dv[interpolant][component] = *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].C), 16); 248 249 if(!(state.interpolant[interpolant].flat & (1 << component))) 250 { 251 Dv[interpolant][component] += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,V[interpolant][component].B), 16); 252 } 253 } 254 } 255 } 256 257 if(state.fog.component) 258 { 259 Df = *Pointer<Float4>(primitive + OFFSET(Primitive,f.C), 16); 260 261 if(!state.fog.flat) 262 { 263 Df += yyyy * *Pointer<Float4>(primitive + OFFSET(Primitive,f.B), 16); 264 } 265 } 266 267 Short4 xLeft[4]; 268 Short4 xRight[4]; 269 270 for(unsigned int q = 0; q < state.multiSample; q++) 271 { 272 xLeft[q] = *Pointer<Short4>(primitive + q * sizeof(Primitive) + OFFSET(Primitive,outline) + y * sizeof(Primitive::Span)); 273 xRight[q] = xLeft[q]; 274 275 xLeft[q] = Swizzle(xLeft[q], 0xA0) - Short4(1, 2, 1, 2); 276 xRight[q] = Swizzle(xRight[q], 0xF5) - Short4(0, 1, 0, 1); 277 } 278 279 For(Int x = x0, x < x1, x += 2) 280 { 281 Short4 xxxx = Short4(x); 282 Int cMask[4]; 283 284 for(unsigned int q = 0; q < state.multiSample; q++) 285 { 286 Short4 mask = CmpGT(xxxx, xLeft[q]) & CmpGT(xRight[q], xxxx); 287 cMask[q] = SignMask(PackSigned(mask, mask)) & 0x0000000F; 288 } 289 290 quad(cBuffer, zBuffer, sBuffer, cMask, x, y); 291 } 292 } 293 294 int clusterCount = Renderer::getClusterCount(); 295 296 for(int index = 0; index < RENDERTARGETS; index++) 297 { 298 if(state.colorWriteActive(index)) 299 { 300 cBuffer[index] += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index])) << (1 + sw::log2(clusterCount)); // FIXME: Precompute 301 } 302 } 303 304 if(state.depthTestActive) 305 { 306 zBuffer += *Pointer<Int>(data + OFFSET(DrawData,depthPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute 307 } 308 309 if(state.stencilActive) 310 { 311 sBuffer += *Pointer<Int>(data + OFFSET(DrawData,stencilPitchB)) << (1 + sw::log2(clusterCount)); // FIXME: Precompute 312 } 313 314 y += 2 * clusterCount; 315 } 316 Until(y >= yMax) 317 } 318 interpolate(Float4 & x,Float4 & D,Float4 & rhw,Pointer<Byte> planeEquation,bool flat,bool perspective,bool clamp)319 Float4 QuadRasterizer::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective, bool clamp) 320 { 321 Float4 interpolant = D; 322 323 if(!flat) 324 { 325 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation, A), 16); 326 327 if(perspective) 328 { 329 interpolant *= rhw; 330 } 331 } 332 333 if(clamp) 334 { 335 interpolant = Min(Max(interpolant, Float4(0.0f)), Float4(1.0f)); 336 } 337 338 return interpolant; 339 } 340 interpolateZ() const341 bool QuadRasterizer::interpolateZ() const 342 { 343 return state.depthTestActive || state.pixelFogActive() || (shader && shader->isVPosDeclared() && fullPixelPositionRegister); 344 } 345 interpolateW() const346 bool QuadRasterizer::interpolateW() const 347 { 348 return state.perspective || (shader && shader->isVPosDeclared() && fullPixelPositionRegister); 349 } 350 } 351