1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file binner.h
24 *
25 * @brief Declaration for the macrotile binner
26 *
27 ******************************************************************************/
28 #include "state.h"
29 #include "conservativeRast.h"
30 #include "utils.h"
31 //////////////////////////////////////////////////////////////////////////
32 /// @brief Offsets added to post-viewport vertex positions based on
33 /// raster state.
34 ///
35 /// Can't use templated variable because we must stick with C++11 features.
36 /// Template variables were introduced with C++14
37 template <typename SIMD_T>
38 struct SwrPixelOffsets
39 {
40 public:
GetOffsetSwrPixelOffsets41 INLINE static Float<SIMD_T> GetOffset(uint32_t loc)
42 {
43 SWR_ASSERT(loc <= 1);
44
45 return SIMD_T::set1_ps(loc ? 0.5f : 0.0f);
46 }
47 };
48
49 //////////////////////////////////////////////////////////////////////////
50 /// @brief Convert the X,Y coords of a triangle to the requested Fixed
51 /// Point precision from FP32.
52 template <typename SIMD_T, typename PT = FixedPointTraits<Fixed_16_8>>
fpToFixedPointVertical(const Float<SIMD_T> & vIn)53 INLINE Integer<SIMD_T> fpToFixedPointVertical(const Float<SIMD_T>& vIn)
54 {
55 return SIMD_T::cvtps_epi32(SIMD_T::mul_ps(vIn, SIMD_T::set1_ps(PT::ScaleT::value)));
56 }
57
58 //////////////////////////////////////////////////////////////////////////
59 /// @brief Helper function to set the X,Y coords of a triangle to the
60 /// requested Fixed Point precision from FP32.
61 /// @param tri: simdvector[3] of FP triangle verts
62 /// @param vXi: fixed point X coords of tri verts
63 /// @param vYi: fixed point Y coords of tri verts
64 template <typename SIMD_T>
65 INLINE static void
FPToFixedPoint(const Vec4<SIMD_T> * const tri,Integer<SIMD_T> (& vXi)[3],Integer<SIMD_T> (& vYi)[3])66 FPToFixedPoint(const Vec4<SIMD_T>* const tri, Integer<SIMD_T> (&vXi)[3], Integer<SIMD_T> (&vYi)[3])
67 {
68 vXi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].x);
69 vYi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].y);
70 vXi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].x);
71 vYi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].y);
72 vXi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].x);
73 vYi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].y);
74 }
75
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Calculate bounding box for current triangle
78 /// @tparam CT: ConservativeRastFETraits type
79 /// @param vX: fixed point X position for triangle verts
80 /// @param vY: fixed point Y position for triangle verts
81 /// @param bbox: fixed point bbox
82 /// *Note*: expects vX, vY to be in the correct precision for the type
83 /// of rasterization. This avoids unnecessary FP->fixed conversions.
84 template <typename SIMD_T, typename CT>
calcBoundingBoxIntVertical(const Integer<SIMD_T> (& vX)[3],const Integer<SIMD_T> (& vY)[3],SIMDBBOX_T<SIMD_T> & bbox)85 INLINE void calcBoundingBoxIntVertical(const Integer<SIMD_T> (&vX)[3],
86 const Integer<SIMD_T> (&vY)[3],
87 SIMDBBOX_T<SIMD_T>& bbox)
88 {
89 Integer<SIMD_T> vMinX = vX[0];
90
91 vMinX = SIMD_T::min_epi32(vMinX, vX[1]);
92 vMinX = SIMD_T::min_epi32(vMinX, vX[2]);
93
94 Integer<SIMD_T> vMaxX = vX[0];
95
96 vMaxX = SIMD_T::max_epi32(vMaxX, vX[1]);
97 vMaxX = SIMD_T::max_epi32(vMaxX, vX[2]);
98
99 Integer<SIMD_T> vMinY = vY[0];
100
101 vMinY = SIMD_T::min_epi32(vMinY, vY[1]);
102 vMinY = SIMD_T::min_epi32(vMinY, vY[2]);
103
104 Integer<SIMD_T> vMaxY = vY[0];
105
106 vMaxY = SIMD_T::max_epi32(vMaxY, vY[1]);
107 vMaxY = SIMD_T::max_epi32(vMaxY, vY[2]);
108
109 if (CT::BoundingBoxOffsetT::value != 0)
110 {
111 /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative
112 /// rasterization expand bbox by 1/256; coverage will be correctly handled in the
113 /// rasterizer.
114
115 const Integer<SIMD_T> value = SIMD_T::set1_epi32(CT::BoundingBoxOffsetT::value);
116
117 vMinX = SIMD_T::sub_epi32(vMinX, value);
118 vMaxX = SIMD_T::add_epi32(vMaxX, value);
119 vMinY = SIMD_T::sub_epi32(vMinY, value);
120 vMaxY = SIMD_T::add_epi32(vMaxY, value);
121 }
122
123 bbox.xmin = vMinX;
124 bbox.xmax = vMaxX;
125 bbox.ymin = vMinY;
126 bbox.ymax = vMaxY;
127 }
128
129 //////////////////////////////////////////////////////////////////////////
130 /// @brief Gather scissor rect data based on per-prim viewport indices.
131 /// @param pScissorsInFixedPoint - array of scissor rects in 16.8 fixed point.
132 /// @param pViewportIndex - array of per-primitive vewport indexes.
133 /// @param scisXmin - output vector of per-prmitive scissor rect Xmin data.
134 /// @param scisYmin - output vector of per-prmitive scissor rect Ymin data.
135 /// @param scisXmax - output vector of per-prmitive scissor rect Xmax data.
136 /// @param scisYmax - output vector of per-prmitive scissor rect Ymax data.
137 //
138 /// @todo: Look at speeding this up -- weigh against corresponding costs in rasterizer.
GatherScissors(const SWR_RECT * pScissorsInFixedPoint,const uint32_t * pViewportIndex,simdscalari & scisXmin,simdscalari & scisYmin,simdscalari & scisXmax,simdscalari & scisYmax)139 static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
140 const uint32_t* pViewportIndex,
141 simdscalari& scisXmin,
142 simdscalari& scisYmin,
143 simdscalari& scisXmax,
144 simdscalari& scisYmax)
145 {
146 scisXmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmin,
147 pScissorsInFixedPoint[pViewportIndex[6]].xmin,
148 pScissorsInFixedPoint[pViewportIndex[5]].xmin,
149 pScissorsInFixedPoint[pViewportIndex[4]].xmin,
150 pScissorsInFixedPoint[pViewportIndex[3]].xmin,
151 pScissorsInFixedPoint[pViewportIndex[2]].xmin,
152 pScissorsInFixedPoint[pViewportIndex[1]].xmin,
153 pScissorsInFixedPoint[pViewportIndex[0]].xmin);
154 scisYmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymin,
155 pScissorsInFixedPoint[pViewportIndex[6]].ymin,
156 pScissorsInFixedPoint[pViewportIndex[5]].ymin,
157 pScissorsInFixedPoint[pViewportIndex[4]].ymin,
158 pScissorsInFixedPoint[pViewportIndex[3]].ymin,
159 pScissorsInFixedPoint[pViewportIndex[2]].ymin,
160 pScissorsInFixedPoint[pViewportIndex[1]].ymin,
161 pScissorsInFixedPoint[pViewportIndex[0]].ymin);
162 scisXmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmax,
163 pScissorsInFixedPoint[pViewportIndex[6]].xmax,
164 pScissorsInFixedPoint[pViewportIndex[5]].xmax,
165 pScissorsInFixedPoint[pViewportIndex[4]].xmax,
166 pScissorsInFixedPoint[pViewportIndex[3]].xmax,
167 pScissorsInFixedPoint[pViewportIndex[2]].xmax,
168 pScissorsInFixedPoint[pViewportIndex[1]].xmax,
169 pScissorsInFixedPoint[pViewportIndex[0]].xmax);
170 scisYmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymax,
171 pScissorsInFixedPoint[pViewportIndex[6]].ymax,
172 pScissorsInFixedPoint[pViewportIndex[5]].ymax,
173 pScissorsInFixedPoint[pViewportIndex[4]].ymax,
174 pScissorsInFixedPoint[pViewportIndex[3]].ymax,
175 pScissorsInFixedPoint[pViewportIndex[2]].ymax,
176 pScissorsInFixedPoint[pViewportIndex[1]].ymax,
177 pScissorsInFixedPoint[pViewportIndex[0]].ymax);
178 }
179
GatherScissors(const SWR_RECT * pScissorsInFixedPoint,const uint32_t * pViewportIndex,simd16scalari & scisXmin,simd16scalari & scisYmin,simd16scalari & scisXmax,simd16scalari & scisYmax)180 static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
181 const uint32_t* pViewportIndex,
182 simd16scalari& scisXmin,
183 simd16scalari& scisYmin,
184 simd16scalari& scisXmax,
185 simd16scalari& scisYmax)
186 {
187 scisXmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmin,
188 pScissorsInFixedPoint[pViewportIndex[14]].xmin,
189 pScissorsInFixedPoint[pViewportIndex[13]].xmin,
190 pScissorsInFixedPoint[pViewportIndex[12]].xmin,
191 pScissorsInFixedPoint[pViewportIndex[11]].xmin,
192 pScissorsInFixedPoint[pViewportIndex[10]].xmin,
193 pScissorsInFixedPoint[pViewportIndex[9]].xmin,
194 pScissorsInFixedPoint[pViewportIndex[8]].xmin,
195 pScissorsInFixedPoint[pViewportIndex[7]].xmin,
196 pScissorsInFixedPoint[pViewportIndex[6]].xmin,
197 pScissorsInFixedPoint[pViewportIndex[5]].xmin,
198 pScissorsInFixedPoint[pViewportIndex[4]].xmin,
199 pScissorsInFixedPoint[pViewportIndex[3]].xmin,
200 pScissorsInFixedPoint[pViewportIndex[2]].xmin,
201 pScissorsInFixedPoint[pViewportIndex[1]].xmin,
202 pScissorsInFixedPoint[pViewportIndex[0]].xmin);
203
204 scisYmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymin,
205 pScissorsInFixedPoint[pViewportIndex[14]].ymin,
206 pScissorsInFixedPoint[pViewportIndex[13]].ymin,
207 pScissorsInFixedPoint[pViewportIndex[12]].ymin,
208 pScissorsInFixedPoint[pViewportIndex[11]].ymin,
209 pScissorsInFixedPoint[pViewportIndex[10]].ymin,
210 pScissorsInFixedPoint[pViewportIndex[9]].ymin,
211 pScissorsInFixedPoint[pViewportIndex[8]].ymin,
212 pScissorsInFixedPoint[pViewportIndex[7]].ymin,
213 pScissorsInFixedPoint[pViewportIndex[6]].ymin,
214 pScissorsInFixedPoint[pViewportIndex[5]].ymin,
215 pScissorsInFixedPoint[pViewportIndex[4]].ymin,
216 pScissorsInFixedPoint[pViewportIndex[3]].ymin,
217 pScissorsInFixedPoint[pViewportIndex[2]].ymin,
218 pScissorsInFixedPoint[pViewportIndex[1]].ymin,
219 pScissorsInFixedPoint[pViewportIndex[0]].ymin);
220
221 scisXmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmax,
222 pScissorsInFixedPoint[pViewportIndex[14]].xmax,
223 pScissorsInFixedPoint[pViewportIndex[13]].xmax,
224 pScissorsInFixedPoint[pViewportIndex[12]].xmax,
225 pScissorsInFixedPoint[pViewportIndex[11]].xmax,
226 pScissorsInFixedPoint[pViewportIndex[10]].xmax,
227 pScissorsInFixedPoint[pViewportIndex[9]].xmax,
228 pScissorsInFixedPoint[pViewportIndex[8]].xmax,
229 pScissorsInFixedPoint[pViewportIndex[7]].xmax,
230 pScissorsInFixedPoint[pViewportIndex[6]].xmax,
231 pScissorsInFixedPoint[pViewportIndex[5]].xmax,
232 pScissorsInFixedPoint[pViewportIndex[4]].xmax,
233 pScissorsInFixedPoint[pViewportIndex[3]].xmax,
234 pScissorsInFixedPoint[pViewportIndex[2]].xmax,
235 pScissorsInFixedPoint[pViewportIndex[1]].xmax,
236 pScissorsInFixedPoint[pViewportIndex[0]].xmax);
237
238 scisYmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymax,
239 pScissorsInFixedPoint[pViewportIndex[14]].ymax,
240 pScissorsInFixedPoint[pViewportIndex[13]].ymax,
241 pScissorsInFixedPoint[pViewportIndex[12]].ymax,
242 pScissorsInFixedPoint[pViewportIndex[11]].ymax,
243 pScissorsInFixedPoint[pViewportIndex[10]].ymax,
244 pScissorsInFixedPoint[pViewportIndex[9]].ymax,
245 pScissorsInFixedPoint[pViewportIndex[8]].ymax,
246 pScissorsInFixedPoint[pViewportIndex[7]].ymax,
247 pScissorsInFixedPoint[pViewportIndex[6]].ymax,
248 pScissorsInFixedPoint[pViewportIndex[5]].ymax,
249 pScissorsInFixedPoint[pViewportIndex[4]].ymax,
250 pScissorsInFixedPoint[pViewportIndex[3]].ymax,
251 pScissorsInFixedPoint[pViewportIndex[2]].ymax,
252 pScissorsInFixedPoint[pViewportIndex[1]].ymax,
253 pScissorsInFixedPoint[pViewportIndex[0]].ymax);
254 }