1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_ShaderCore_hpp 16 #define sw_ShaderCore_hpp 17 18 #include "Debug.hpp" 19 #include "Shader.hpp" 20 #include "Reactor/Reactor.hpp" 21 22 namespace sw 23 { 24 class Vector4s 25 { 26 public: 27 Vector4s(); 28 Vector4s(unsigned short x, unsigned short y, unsigned short z, unsigned short w); 29 Vector4s(const Vector4s &rhs); 30 31 Short4 &operator[](int i); 32 Vector4s &operator=(const Vector4s &rhs); 33 34 Short4 x; 35 Short4 y; 36 Short4 z; 37 Short4 w; 38 }; 39 40 class Vector4f 41 { 42 public: 43 Vector4f(); 44 Vector4f(float x, float y, float z, float w); 45 Vector4f(const Vector4f &rhs); 46 47 Float4 &operator[](int i); 48 Vector4f &operator=(const Vector4f &rhs); 49 50 Float4 x; 51 Float4 y; 52 Float4 z; 53 Float4 w; 54 }; 55 56 Float4 exponential2(RValue<Float4> x, bool pp = false); 57 Float4 logarithm2(RValue<Float4> x, bool abs, bool pp = false); 58 Float4 exponential(RValue<Float4> x, bool pp = false); 59 Float4 logarithm(RValue<Float4> x, bool abs, bool pp = false); 60 Float4 power(RValue<Float4> x, RValue<Float4> y, bool pp = false); 61 Float4 reciprocal(RValue<Float4> x, bool pp = false, bool finite = false, bool exactAtPow2 = false); 62 Float4 reciprocalSquareRoot(RValue<Float4> x, bool abs, bool pp = false); 63 Float4 modulo(RValue<Float4> x, RValue<Float4> y); 64 Float4 sine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range 65 Float4 cosine_pi(RValue<Float4> x, bool pp = false); // limited to [-pi, pi] range 66 Float4 sine(RValue<Float4> x, bool pp = false); 67 Float4 cosine(RValue<Float4> x, bool pp = false); 68 Float4 tangent(RValue<Float4> x, bool pp = false); 69 Float4 arccos(RValue<Float4> x, bool pp = false); 70 Float4 arcsin(RValue<Float4> x, bool pp = false); 71 Float4 arctan(RValue<Float4> x, bool pp = false); 72 Float4 arctan(RValue<Float4> y, RValue<Float4> x, bool pp = false); 73 Float4 sineh(RValue<Float4> x, bool pp = false); 74 Float4 cosineh(RValue<Float4> x, bool pp = false); 75 Float4 tangenth(RValue<Float4> x, bool pp = false); 76 Float4 arccosh(RValue<Float4> x, bool pp = false); // Limited to x >= 1 77 Float4 arcsinh(RValue<Float4> x, bool pp = false); 78 Float4 arctanh(RValue<Float4> x, bool pp = false); // Limited to ]-1, 1[ range 79 80 Float4 dot2(const Vector4f &v0, const Vector4f &v1); 81 Float4 dot3(const Vector4f &v0, const Vector4f &v1); 82 Float4 dot4(const Vector4f &v0, const Vector4f &v1); 83 84 void transpose4x4(Short4 &row0, Short4 &row1, Short4 &row2, Short4 &row3); 85 void transpose4x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 86 void transpose4x3(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 87 void transpose4x2(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 88 void transpose4x1(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 89 void transpose2x4(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3); 90 void transpose4xN(Float4 &row0, Float4 &row1, Float4 &row2, Float4 &row3, int N); 91 92 class Register 93 { 94 public: Register(const Reference<Float4> & x,const Reference<Float4> & y,const Reference<Float4> & z,const Reference<Float4> & w)95 Register(const Reference<Float4> &x, const Reference<Float4> &y, const Reference<Float4> &z, const Reference<Float4> &w) : x(x), y(y), z(z), w(w) 96 { 97 } 98 operator [](int i)99 Reference<Float4> &operator[](int i) 100 { 101 switch(i) 102 { 103 default: 104 case 0: return x; 105 case 1: return y; 106 case 2: return z; 107 case 3: return w; 108 } 109 } 110 operator =(const Register & rhs)111 Register &operator=(const Register &rhs) 112 { 113 x = rhs.x; 114 y = rhs.y; 115 z = rhs.z; 116 w = rhs.w; 117 118 return *this; 119 } 120 operator =(const Vector4f & rhs)121 Register &operator=(const Vector4f &rhs) 122 { 123 x = rhs.x; 124 y = rhs.y; 125 z = rhs.z; 126 w = rhs.w; 127 128 return *this; 129 } 130 operator Vector4f()131 operator Vector4f() 132 { 133 Vector4f v; 134 135 v.x = x; 136 v.y = y; 137 v.z = z; 138 v.w = w; 139 140 return v; 141 } 142 143 Reference<Float4> x; 144 Reference<Float4> y; 145 Reference<Float4> z; 146 Reference<Float4> w; 147 }; 148 149 template<int S, bool D = false> 150 class RegisterArray 151 { 152 public: RegisterArray(bool dynamic=D)153 RegisterArray(bool dynamic = D) : dynamic(dynamic) 154 { 155 if(dynamic) 156 { 157 x = new Array<Float4>(S); 158 y = new Array<Float4>(S); 159 z = new Array<Float4>(S); 160 w = new Array<Float4>(S); 161 } 162 else 163 { 164 x = new Array<Float4>[S]; 165 y = new Array<Float4>[S]; 166 z = new Array<Float4>[S]; 167 w = new Array<Float4>[S]; 168 } 169 } 170 ~RegisterArray()171 ~RegisterArray() 172 { 173 if(dynamic) 174 { 175 delete x; 176 delete y; 177 delete z; 178 delete w; 179 } 180 else 181 { 182 delete[] x; 183 delete[] y; 184 delete[] z; 185 delete[] w; 186 } 187 } 188 operator [](int i)189 Register operator[](int i) 190 { 191 if(dynamic) 192 { 193 return Register(x[0][i], y[0][i], z[0][i], w[0][i]); 194 } 195 else 196 { 197 return Register(x[i][0], y[i][0], z[i][0], w[i][0]); 198 } 199 } 200 operator [](RValue<Int> i)201 Register operator[](RValue<Int> i) 202 { 203 ASSERT(dynamic); 204 205 return Register(x[0][i], y[0][i], z[0][i], w[0][i]); 206 } 207 208 private: 209 const bool dynamic; 210 Array<Float4> *x; 211 Array<Float4> *y; 212 Array<Float4> *z; 213 Array<Float4> *w; 214 }; 215 216 class ShaderCore 217 { 218 typedef Shader::Control Control; 219 220 public: 221 void mov(Vector4f &dst, const Vector4f &src, bool integerDestination = false); 222 void neg(Vector4f &dst, const Vector4f &src); 223 void ineg(Vector4f &dst, const Vector4f &src); 224 void f2b(Vector4f &dst, const Vector4f &src); 225 void b2f(Vector4f &dst, const Vector4f &src); 226 void f2i(Vector4f &dst, const Vector4f &src); 227 void i2f(Vector4f &dst, const Vector4f &src); 228 void f2u(Vector4f &dst, const Vector4f &src); 229 void u2f(Vector4f &dst, const Vector4f &src); 230 void i2b(Vector4f &dst, const Vector4f &src); 231 void b2i(Vector4f &dst, const Vector4f &src); 232 void add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 233 void iadd(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 234 void sub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 235 void isub(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 236 void mad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 237 void imad(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 238 void mul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 239 void imul(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 240 void rcpx(Vector4f &dst, const Vector4f &src, bool pp = false); 241 void div(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 242 void idiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 243 void udiv(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 244 void mod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 245 void imod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 246 void umod(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 247 void shl(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 248 void ishr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 249 void ushr(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 250 void rsqx(Vector4f &dst, const Vector4f &src, bool pp = false); 251 void sqrt(Vector4f &dst, const Vector4f &src, bool pp = false); 252 void rsq(Vector4f &dst, const Vector4f &src, bool pp = false); 253 void len2(Float4 &dst, const Vector4f &src, bool pp = false); 254 void len3(Float4 &dst, const Vector4f &src, bool pp = false); 255 void len4(Float4 &dst, const Vector4f &src, bool pp = false); 256 void dist1(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 257 void dist2(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 258 void dist3(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 259 void dist4(Float4 &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 260 void dp1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 261 void dp2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 262 void dp2add(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 263 void dp3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 264 void dp4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 265 void det2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 266 void det3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 267 void det4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2, const Vector4f &src3); 268 void min(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 269 void imin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 270 void umin(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 271 void max(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 272 void imax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 273 void umax(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 274 void slt(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 275 void step(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 276 void exp2x(Vector4f &dst, const Vector4f &src, bool pp = false); 277 void exp2(Vector4f &dst, const Vector4f &src, bool pp = false); 278 void exp(Vector4f &dst, const Vector4f &src, bool pp = false); 279 void log2x(Vector4f &dst, const Vector4f &src, bool pp = false); 280 void log2(Vector4f &dst, const Vector4f &src, bool pp = false); 281 void log(Vector4f &dst, const Vector4f &src, bool pp = false); 282 void lit(Vector4f &dst, const Vector4f &src); 283 void att(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 284 void lrp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 285 void smooth(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 286 void packHalf2x16(Vector4f &dst, const Vector4f &src); 287 void unpackHalf2x16(Vector4f &dst, const Vector4f &src); 288 void packSnorm2x16(Vector4f &dst, const Vector4f &src); 289 void packUnorm2x16(Vector4f &dst, const Vector4f &src); 290 void unpackSnorm2x16(Vector4f &dst, const Vector4f &src); 291 void unpackUnorm2x16(Vector4f &dst, const Vector4f &src); 292 void frc(Vector4f &dst, const Vector4f &src); 293 void trunc(Vector4f &dst, const Vector4f &src); 294 void floor(Vector4f &dst, const Vector4f &src); 295 void round(Vector4f &dst, const Vector4f &src); 296 void roundEven(Vector4f &dst, const Vector4f &src); 297 void ceil(Vector4f &dst, const Vector4f &src); 298 void powx(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 299 void pow(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 300 void crs(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 301 void forward1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 302 void forward2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 303 void forward3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 304 void forward4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 305 void reflect1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 306 void reflect2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 307 void reflect3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 308 void reflect4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 309 void refract1(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 310 void refract2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 311 void refract3(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 312 void refract4(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Float4 &src2); 313 void sgn(Vector4f &dst, const Vector4f &src); 314 void isgn(Vector4f &dst, const Vector4f &src); 315 void abs(Vector4f &dst, const Vector4f &src); 316 void iabs(Vector4f &dst, const Vector4f &src); 317 void nrm2(Vector4f &dst, const Vector4f &src, bool pp = false); 318 void nrm3(Vector4f &dst, const Vector4f &src, bool pp = false); 319 void nrm4(Vector4f &dst, const Vector4f &src, bool pp = false); 320 void sincos(Vector4f &dst, const Vector4f &src, bool pp = false); 321 void cos(Vector4f &dst, const Vector4f &src, bool pp = false); 322 void sin(Vector4f &dst, const Vector4f &src, bool pp = false); 323 void tan(Vector4f &dst, const Vector4f &src, bool pp = false); 324 void acos(Vector4f &dst, const Vector4f &src, bool pp = false); 325 void asin(Vector4f &dst, const Vector4f &src, bool pp = false); 326 void atan(Vector4f &dst, const Vector4f &src, bool pp = false); 327 void atan2(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, bool pp = false); 328 void cosh(Vector4f &dst, const Vector4f &src, bool pp = false); 329 void sinh(Vector4f &dst, const Vector4f &src, bool pp = false); 330 void tanh(Vector4f &dst, const Vector4f &src, bool pp = false); 331 void acosh(Vector4f &dst, const Vector4f &src, bool pp = false); 332 void asinh(Vector4f &dst, const Vector4f &src, bool pp = false); 333 void atanh(Vector4f &dst, const Vector4f &src, bool pp = false); 334 void expp(Vector4f &dst, const Vector4f &src, unsigned short version); 335 void logp(Vector4f &dst, const Vector4f &src, unsigned short version); 336 void cmp0(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 337 void cmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); 338 void icmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); 339 void ucmp(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, Control control); 340 void select(Vector4f &dst, const Vector4f &src0, const Vector4f &src1, const Vector4f &src2); 341 void extract(Float4 &dst, const Vector4f &src0, const Float4 &src1); 342 void insert(Vector4f &dst, const Vector4f &src, const Float4 &element, const Float4 &index); 343 void all(Float4 &dst, const Vector4f &src); 344 void any(Float4 &dst, const Vector4f &src); 345 void bitwise_not(Vector4f &dst, const Vector4f &src); 346 void bitwise_or(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 347 void bitwise_xor(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 348 void bitwise_and(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 349 void equal(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 350 void notEqual(Vector4f &dst, const Vector4f &src0, const Vector4f &src1); 351 352 private: 353 void sgn(Float4 &dst, const Float4 &src); 354 void isgn(Float4 &dst, const Float4 &src); 355 void cmp0(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2); 356 void cmp0i(Float4 &dst, const Float4 &src0, const Float4 &src1, const Float4 &src2); 357 void select(Float4 &dst, RValue<Int4> src0, const Float4 &src1, const Float4 &src2); 358 void floatToHalfBits(Float4& dst, const Float4& floatBits, bool storeInUpperBits); 359 void halfToFloatBits(Float4& dst, const Float4& halfBits); 360 }; 361 } 362 363 #endif // sw_ShaderCore_hpp 364