1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Surface.hpp" 16 17 #include "Color.hpp" 18 #include "Context.hpp" 19 #include "ETC_Decoder.hpp" 20 #include "Renderer.hpp" 21 #include "Common/Half.hpp" 22 #include "Common/Memory.hpp" 23 #include "Common/CPUID.hpp" 24 #include "Common/Resource.hpp" 25 #include "Common/Debug.hpp" 26 #include "Reactor/Reactor.hpp" 27 28 #if defined(__i386__) || defined(__x86_64__) 29 #include <xmmintrin.h> 30 #include <emmintrin.h> 31 #endif 32 33 #undef min 34 #undef max 35 36 namespace sw 37 { 38 extern bool quadLayoutEnabled; 39 extern bool complementaryDepthBuffer; 40 extern TranscendentalPrecision logPrecision; 41 42 unsigned int *Surface::palette = 0; 43 unsigned int Surface::paletteID = 0; 44 clip(int minX,int minY,int maxX,int maxY)45 void Rect::clip(int minX, int minY, int maxX, int maxY) 46 { 47 x0 = clamp(x0, minX, maxX); 48 y0 = clamp(y0, minY, maxY); 49 x1 = clamp(x1, minX, maxX); 50 y1 = clamp(y1, minY, maxY); 51 } 52 write(int x,int y,int z,const Color<float> & color)53 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color) 54 { 55 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 56 57 write(element, color); 58 } 59 write(int x,int y,const Color<float> & color)60 void Surface::Buffer::write(int x, int y, const Color<float> &color) 61 { 62 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 63 64 write(element, color); 65 } 66 write(void * element,const Color<float> & color)67 inline void Surface::Buffer::write(void *element, const Color<float> &color) 68 { 69 switch(format) 70 { 71 case FORMAT_A8: 72 *(unsigned char*)element = unorm<8>(color.a); 73 break; 74 case FORMAT_R8I_SNORM: 75 *(char*)element = snorm<8>(color.r); 76 break; 77 case FORMAT_R8: 78 *(unsigned char*)element = unorm<8>(color.r); 79 break; 80 case FORMAT_R8I: 81 *(char*)element = scast<8>(color.r); 82 break; 83 case FORMAT_R8UI: 84 *(unsigned char*)element = ucast<8>(color.r); 85 break; 86 case FORMAT_R16I: 87 *(short*)element = scast<16>(color.r); 88 break; 89 case FORMAT_R16UI: 90 *(unsigned short*)element = ucast<16>(color.r); 91 break; 92 case FORMAT_R32I: 93 *(int*)element = static_cast<int>(color.r); 94 break; 95 case FORMAT_R32UI: 96 *(unsigned int*)element = static_cast<unsigned int>(color.r); 97 break; 98 case FORMAT_R3G3B2: 99 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 100 break; 101 case FORMAT_A8R3G3B2: 102 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 103 break; 104 case FORMAT_X4R4G4B4: 105 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 106 break; 107 case FORMAT_A4R4G4B4: 108 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 109 break; 110 case FORMAT_R4G4B4A4: 111 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0); 112 break; 113 case FORMAT_R5G6B5: 114 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0); 115 break; 116 case FORMAT_A1R5G5B5: 117 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 118 break; 119 case FORMAT_R5G5B5A1: 120 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0); 121 break; 122 case FORMAT_X1R5G5B5: 123 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 124 break; 125 case FORMAT_A8R8G8B8: 126 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 127 break; 128 case FORMAT_X8R8G8B8: 129 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 130 break; 131 case FORMAT_A8B8G8R8I_SNORM: 132 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) | 133 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) | 134 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) | 135 (static_cast<unsigned int>(snorm<8>(color.r)) << 0); 136 break; 137 case FORMAT_A8B8G8R8: 138 case FORMAT_SRGB8_A8: 139 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 140 break; 141 case FORMAT_A8B8G8R8I: 142 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) | 143 (static_cast<unsigned int>(scast<8>(color.b)) << 16) | 144 (static_cast<unsigned int>(scast<8>(color.g)) << 8) | 145 (static_cast<unsigned int>(scast<8>(color.r)) << 0); 146 break; 147 case FORMAT_A8B8G8R8UI: 148 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 149 break; 150 case FORMAT_X8B8G8R8I_SNORM: 151 *(unsigned int*)element = 0x7F000000 | 152 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) | 153 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) | 154 (static_cast<unsigned int>(snorm<8>(color.r)) << 0); 155 break; 156 case FORMAT_X8B8G8R8: 157 case FORMAT_SRGB8_X8: 158 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 159 break; 160 case FORMAT_X8B8G8R8I: 161 *(unsigned int*)element = 0x7F000000 | 162 (static_cast<unsigned int>(scast<8>(color.b)) << 16) | 163 (static_cast<unsigned int>(scast<8>(color.g)) << 8) | 164 (static_cast<unsigned int>(scast<8>(color.r)) << 0); 165 case FORMAT_X8B8G8R8UI: 166 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 167 break; 168 case FORMAT_A2R10G10B10: 169 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0); 170 break; 171 case FORMAT_A2B10G10R10: 172 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0); 173 break; 174 case FORMAT_G8R8I_SNORM: 175 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) | 176 (static_cast<unsigned short>(snorm<8>(color.r)) << 0); 177 break; 178 case FORMAT_G8R8: 179 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 180 break; 181 case FORMAT_G8R8I: 182 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) | 183 (static_cast<unsigned short>(scast<8>(color.r)) << 0); 184 break; 185 case FORMAT_G8R8UI: 186 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 187 break; 188 case FORMAT_G16R16: 189 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0); 190 break; 191 case FORMAT_G16R16I: 192 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) | 193 (static_cast<unsigned int>(scast<16>(color.r)) << 0); 194 break; 195 case FORMAT_G16R16UI: 196 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0); 197 break; 198 case FORMAT_G32R32I: 199 case FORMAT_G32R32UI: 200 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 201 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 202 break; 203 case FORMAT_A16B16G16R16: 204 ((unsigned short*)element)[0] = unorm<16>(color.r); 205 ((unsigned short*)element)[1] = unorm<16>(color.g); 206 ((unsigned short*)element)[2] = unorm<16>(color.b); 207 ((unsigned short*)element)[3] = unorm<16>(color.a); 208 break; 209 case FORMAT_A16B16G16R16I: 210 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r)); 211 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g)); 212 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b)); 213 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a)); 214 break; 215 case FORMAT_A16B16G16R16UI: 216 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r)); 217 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g)); 218 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b)); 219 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a)); 220 break; 221 case FORMAT_X16B16G16R16I: 222 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r)); 223 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g)); 224 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b)); 225 break; 226 case FORMAT_X16B16G16R16UI: 227 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r)); 228 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g)); 229 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b)); 230 break; 231 case FORMAT_A32B32G32R32I: 232 case FORMAT_A32B32G32R32UI: 233 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 234 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 235 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b); 236 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a); 237 break; 238 case FORMAT_X32B32G32R32I: 239 case FORMAT_X32B32G32R32UI: 240 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 241 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 242 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b); 243 break; 244 case FORMAT_V8U8: 245 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 246 break; 247 case FORMAT_L6V5U5: 248 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0); 249 break; 250 case FORMAT_Q8W8V8U8: 251 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 252 break; 253 case FORMAT_X8L8V8U8: 254 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 255 break; 256 case FORMAT_V16U16: 257 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0); 258 break; 259 case FORMAT_A2W10V10U10: 260 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0); 261 break; 262 case FORMAT_A16W16V16U16: 263 ((unsigned short*)element)[0] = snorm<16>(color.r); 264 ((unsigned short*)element)[1] = snorm<16>(color.g); 265 ((unsigned short*)element)[2] = snorm<16>(color.b); 266 ((unsigned short*)element)[3] = unorm<16>(color.a); 267 break; 268 case FORMAT_Q16W16V16U16: 269 ((unsigned short*)element)[0] = snorm<16>(color.r); 270 ((unsigned short*)element)[1] = snorm<16>(color.g); 271 ((unsigned short*)element)[2] = snorm<16>(color.b); 272 ((unsigned short*)element)[3] = snorm<16>(color.a); 273 break; 274 case FORMAT_R8G8B8: 275 ((unsigned char*)element)[0] = unorm<8>(color.b); 276 ((unsigned char*)element)[1] = unorm<8>(color.g); 277 ((unsigned char*)element)[2] = unorm<8>(color.r); 278 break; 279 case FORMAT_B8G8R8: 280 ((unsigned char*)element)[0] = unorm<8>(color.r); 281 ((unsigned char*)element)[1] = unorm<8>(color.g); 282 ((unsigned char*)element)[2] = unorm<8>(color.b); 283 break; 284 case FORMAT_R16F: 285 *(half*)element = (half)color.r; 286 break; 287 case FORMAT_A16F: 288 *(half*)element = (half)color.a; 289 break; 290 case FORMAT_G16R16F: 291 ((half*)element)[0] = (half)color.r; 292 ((half*)element)[1] = (half)color.g; 293 break; 294 case FORMAT_B16G16R16F: 295 ((half*)element)[0] = (half)color.r; 296 ((half*)element)[1] = (half)color.g; 297 ((half*)element)[2] = (half)color.b; 298 break; 299 case FORMAT_A16B16G16R16F: 300 ((half*)element)[0] = (half)color.r; 301 ((half*)element)[1] = (half)color.g; 302 ((half*)element)[2] = (half)color.b; 303 ((half*)element)[3] = (half)color.a; 304 break; 305 case FORMAT_A32F: 306 *(float*)element = color.a; 307 break; 308 case FORMAT_R32F: 309 *(float*)element = color.r; 310 break; 311 case FORMAT_G32R32F: 312 ((float*)element)[0] = color.r; 313 ((float*)element)[1] = color.g; 314 break; 315 case FORMAT_X32B32G32R32F: 316 ((float*)element)[3] = 1.0f; 317 case FORMAT_B32G32R32F: 318 ((float*)element)[0] = color.r; 319 ((float*)element)[1] = color.g; 320 ((float*)element)[2] = color.b; 321 break; 322 case FORMAT_A32B32G32R32F: 323 ((float*)element)[0] = color.r; 324 ((float*)element)[1] = color.g; 325 ((float*)element)[2] = color.b; 326 ((float*)element)[3] = color.a; 327 break; 328 case FORMAT_D32F: 329 case FORMAT_D32F_LOCKABLE: 330 case FORMAT_D32FS8_TEXTURE: 331 case FORMAT_D32FS8_SHADOW: 332 *((float*)element) = color.r; 333 break; 334 case FORMAT_D32F_COMPLEMENTARY: 335 *((float*)element) = 1 - color.r; 336 break; 337 case FORMAT_S8: 338 *((unsigned char*)element) = unorm<8>(color.r); 339 break; 340 case FORMAT_L8: 341 *(unsigned char*)element = unorm<8>(color.r); 342 break; 343 case FORMAT_A4L4: 344 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0); 345 break; 346 case FORMAT_L16: 347 *(unsigned short*)element = unorm<16>(color.r); 348 break; 349 case FORMAT_A8L8: 350 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0); 351 break; 352 case FORMAT_L16F: 353 *(half*)element = (half)color.r; 354 break; 355 case FORMAT_A16L16F: 356 ((half*)element)[0] = (half)color.r; 357 ((half*)element)[1] = (half)color.a; 358 break; 359 case FORMAT_L32F: 360 *(float*)element = color.r; 361 break; 362 case FORMAT_A32L32F: 363 ((float*)element)[0] = color.r; 364 ((float*)element)[1] = color.a; 365 break; 366 default: 367 ASSERT(false); 368 } 369 } 370 read(int x,int y,int z) const371 Color<float> Surface::Buffer::read(int x, int y, int z) const 372 { 373 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 374 375 return read(element); 376 } 377 read(int x,int y) const378 Color<float> Surface::Buffer::read(int x, int y) const 379 { 380 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 381 382 return read(element); 383 } 384 read(void * element) const385 inline Color<float> Surface::Buffer::read(void *element) const 386 { 387 float r = 0.0f; 388 float g = 0.0f; 389 float b = 0.0f; 390 float a = 1.0f; 391 392 switch(format) 393 { 394 case FORMAT_P8: 395 { 396 ASSERT(palette); 397 398 unsigned int abgr = palette[*(unsigned char*)element]; 399 400 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 401 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 402 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 403 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 404 } 405 break; 406 case FORMAT_A8P8: 407 { 408 ASSERT(palette); 409 410 unsigned int bgr = palette[((unsigned char*)element)[0]]; 411 412 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF); 413 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00); 414 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000); 415 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 416 } 417 break; 418 case FORMAT_A8: 419 r = 0; 420 g = 0; 421 b = 0; 422 a = *(unsigned char*)element * (1.0f / 0xFF); 423 break; 424 case FORMAT_R8I_SNORM: 425 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f); 426 break; 427 case FORMAT_R8: 428 r = *(unsigned char*)element * (1.0f / 0xFF); 429 break; 430 case FORMAT_R8I: 431 r = *(signed char*)element; 432 break; 433 case FORMAT_R8UI: 434 r = *(unsigned char*)element; 435 break; 436 case FORMAT_R3G3B2: 437 { 438 unsigned char rgb = *(unsigned char*)element; 439 440 r = (rgb & 0xE0) * (1.0f / 0xE0); 441 g = (rgb & 0x1C) * (1.0f / 0x1C); 442 b = (rgb & 0x03) * (1.0f / 0x03); 443 } 444 break; 445 case FORMAT_A8R3G3B2: 446 { 447 unsigned short argb = *(unsigned short*)element; 448 449 a = (argb & 0xFF00) * (1.0f / 0xFF00); 450 r = (argb & 0x00E0) * (1.0f / 0x00E0); 451 g = (argb & 0x001C) * (1.0f / 0x001C); 452 b = (argb & 0x0003) * (1.0f / 0x0003); 453 } 454 break; 455 case FORMAT_X4R4G4B4: 456 { 457 unsigned short rgb = *(unsigned short*)element; 458 459 r = (rgb & 0x0F00) * (1.0f / 0x0F00); 460 g = (rgb & 0x00F0) * (1.0f / 0x00F0); 461 b = (rgb & 0x000F) * (1.0f / 0x000F); 462 } 463 break; 464 case FORMAT_A4R4G4B4: 465 { 466 unsigned short argb = *(unsigned short*)element; 467 468 a = (argb & 0xF000) * (1.0f / 0xF000); 469 r = (argb & 0x0F00) * (1.0f / 0x0F00); 470 g = (argb & 0x00F0) * (1.0f / 0x00F0); 471 b = (argb & 0x000F) * (1.0f / 0x000F); 472 } 473 break; 474 case FORMAT_R4G4B4A4: 475 { 476 unsigned short rgba = *(unsigned short*)element; 477 478 r = (rgba & 0xF000) * (1.0f / 0xF000); 479 g = (rgba & 0x0F00) * (1.0f / 0x0F00); 480 b = (rgba & 0x00F0) * (1.0f / 0x00F0); 481 a = (rgba & 0x000F) * (1.0f / 0x000F); 482 } 483 break; 484 case FORMAT_R5G6B5: 485 { 486 unsigned short rgb = *(unsigned short*)element; 487 488 r = (rgb & 0xF800) * (1.0f / 0xF800); 489 g = (rgb & 0x07E0) * (1.0f / 0x07E0); 490 b = (rgb & 0x001F) * (1.0f / 0x001F); 491 } 492 break; 493 case FORMAT_A1R5G5B5: 494 { 495 unsigned short argb = *(unsigned short*)element; 496 497 a = (argb & 0x8000) * (1.0f / 0x8000); 498 r = (argb & 0x7C00) * (1.0f / 0x7C00); 499 g = (argb & 0x03E0) * (1.0f / 0x03E0); 500 b = (argb & 0x001F) * (1.0f / 0x001F); 501 } 502 break; 503 case FORMAT_R5G5B5A1: 504 { 505 unsigned short rgba = *(unsigned short*)element; 506 507 r = (rgba & 0xF800) * (1.0f / 0xF800); 508 g = (rgba & 0x07C0) * (1.0f / 0x07C0); 509 b = (rgba & 0x003E) * (1.0f / 0x003E); 510 a = (rgba & 0x0001) * (1.0f / 0x0001); 511 } 512 break; 513 case FORMAT_X1R5G5B5: 514 { 515 unsigned short xrgb = *(unsigned short*)element; 516 517 r = (xrgb & 0x7C00) * (1.0f / 0x7C00); 518 g = (xrgb & 0x03E0) * (1.0f / 0x03E0); 519 b = (xrgb & 0x001F) * (1.0f / 0x001F); 520 } 521 break; 522 case FORMAT_A8R8G8B8: 523 { 524 unsigned int argb = *(unsigned int*)element; 525 526 a = (argb & 0xFF000000) * (1.0f / 0xFF000000); 527 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000); 528 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00); 529 b = (argb & 0x000000FF) * (1.0f / 0x000000FF); 530 } 531 break; 532 case FORMAT_X8R8G8B8: 533 { 534 unsigned int xrgb = *(unsigned int*)element; 535 536 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000); 537 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00); 538 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF); 539 } 540 break; 541 case FORMAT_A8B8G8R8I_SNORM: 542 { 543 signed char* abgr = (signed char*)element; 544 545 r = max(abgr[0] * (1.0f / 0x7F), -1.0f); 546 g = max(abgr[1] * (1.0f / 0x7F), -1.0f); 547 b = max(abgr[2] * (1.0f / 0x7F), -1.0f); 548 a = max(abgr[3] * (1.0f / 0x7F), -1.0f); 549 } 550 break; 551 case FORMAT_A8B8G8R8: 552 case FORMAT_SRGB8_A8: 553 { 554 unsigned int abgr = *(unsigned int*)element; 555 556 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 557 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 558 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 559 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 560 } 561 break; 562 case FORMAT_A8B8G8R8I: 563 { 564 signed char* abgr = (signed char*)element; 565 566 r = abgr[0]; 567 g = abgr[1]; 568 b = abgr[2]; 569 a = abgr[3]; 570 } 571 break; 572 case FORMAT_A8B8G8R8UI: 573 { 574 unsigned char* abgr = (unsigned char*)element; 575 576 r = abgr[0]; 577 g = abgr[1]; 578 b = abgr[2]; 579 a = abgr[3]; 580 } 581 break; 582 case FORMAT_X8B8G8R8I_SNORM: 583 { 584 signed char* bgr = (signed char*)element; 585 586 r = max(bgr[0] * (1.0f / 0x7F), -1.0f); 587 g = max(bgr[1] * (1.0f / 0x7F), -1.0f); 588 b = max(bgr[2] * (1.0f / 0x7F), -1.0f); 589 } 590 break; 591 case FORMAT_X8B8G8R8: 592 case FORMAT_SRGB8_X8: 593 { 594 unsigned int xbgr = *(unsigned int*)element; 595 596 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000); 597 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00); 598 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF); 599 } 600 break; 601 case FORMAT_X8B8G8R8I: 602 { 603 signed char* bgr = (signed char*)element; 604 605 r = bgr[0]; 606 g = bgr[1]; 607 b = bgr[2]; 608 } 609 break; 610 case FORMAT_X8B8G8R8UI: 611 { 612 unsigned char* bgr = (unsigned char*)element; 613 614 r = bgr[0]; 615 g = bgr[1]; 616 b = bgr[2]; 617 } 618 break; 619 case FORMAT_G8R8I_SNORM: 620 { 621 signed char* gr = (signed char*)element; 622 623 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00); 624 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF); 625 } 626 break; 627 case FORMAT_G8R8: 628 { 629 unsigned short gr = *(unsigned short*)element; 630 631 g = (gr & 0xFF00) * (1.0f / 0xFF00); 632 r = (gr & 0x00FF) * (1.0f / 0x00FF); 633 } 634 break; 635 case FORMAT_G8R8I: 636 { 637 signed char* gr = (signed char*)element; 638 639 r = gr[0]; 640 g = gr[1]; 641 } 642 break; 643 case FORMAT_G8R8UI: 644 { 645 unsigned char* gr = (unsigned char*)element; 646 647 r = gr[0]; 648 g = gr[1]; 649 } 650 break; 651 case FORMAT_R16I: 652 r = *((short*)element); 653 break; 654 case FORMAT_R16UI: 655 r = *((unsigned short*)element); 656 break; 657 case FORMAT_G16R16I: 658 { 659 short* gr = (short*)element; 660 661 r = gr[0]; 662 g = gr[1]; 663 } 664 break; 665 case FORMAT_G16R16: 666 { 667 unsigned int gr = *(unsigned int*)element; 668 669 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000); 670 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF); 671 } 672 break; 673 case FORMAT_G16R16UI: 674 { 675 unsigned short* gr = (unsigned short*)element; 676 677 r = gr[0]; 678 g = gr[1]; 679 } 680 break; 681 case FORMAT_A2R10G10B10: 682 { 683 unsigned int argb = *(unsigned int*)element; 684 685 a = (argb & 0xC0000000) * (1.0f / 0xC0000000); 686 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000); 687 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00); 688 b = (argb & 0x000003FF) * (1.0f / 0x000003FF); 689 } 690 break; 691 case FORMAT_A2B10G10R10: 692 { 693 unsigned int abgr = *(unsigned int*)element; 694 695 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000); 696 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000); 697 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00); 698 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF); 699 } 700 break; 701 case FORMAT_A16B16G16R16I: 702 { 703 short* abgr = (short*)element; 704 705 r = abgr[0]; 706 g = abgr[1]; 707 b = abgr[2]; 708 a = abgr[3]; 709 } 710 break; 711 case FORMAT_A16B16G16R16: 712 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF); 713 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF); 714 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF); 715 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 716 break; 717 case FORMAT_A16B16G16R16UI: 718 { 719 unsigned short* abgr = (unsigned short*)element; 720 721 r = abgr[0]; 722 g = abgr[1]; 723 b = abgr[2]; 724 a = abgr[3]; 725 } 726 break; 727 case FORMAT_X16B16G16R16I: 728 { 729 short* bgr = (short*)element; 730 731 r = bgr[0]; 732 g = bgr[1]; 733 b = bgr[2]; 734 } 735 break; 736 case FORMAT_X16B16G16R16UI: 737 { 738 unsigned short* bgr = (unsigned short*)element; 739 740 r = bgr[0]; 741 g = bgr[1]; 742 b = bgr[2]; 743 } 744 break; 745 case FORMAT_A32B32G32R32I: 746 { 747 int* abgr = (int*)element; 748 749 r = static_cast<float>(abgr[0]); 750 g = static_cast<float>(abgr[1]); 751 b = static_cast<float>(abgr[2]); 752 a = static_cast<float>(abgr[3]); 753 } 754 break; 755 case FORMAT_A32B32G32R32UI: 756 { 757 unsigned int* abgr = (unsigned int*)element; 758 759 r = static_cast<float>(abgr[0]); 760 g = static_cast<float>(abgr[1]); 761 b = static_cast<float>(abgr[2]); 762 a = static_cast<float>(abgr[3]); 763 } 764 break; 765 case FORMAT_X32B32G32R32I: 766 { 767 int* bgr = (int*)element; 768 769 r = static_cast<float>(bgr[0]); 770 g = static_cast<float>(bgr[1]); 771 b = static_cast<float>(bgr[2]); 772 } 773 break; 774 case FORMAT_X32B32G32R32UI: 775 { 776 unsigned int* bgr = (unsigned int*)element; 777 778 r = static_cast<float>(bgr[0]); 779 g = static_cast<float>(bgr[1]); 780 b = static_cast<float>(bgr[2]); 781 } 782 break; 783 case FORMAT_G32R32I: 784 { 785 int* gr = (int*)element; 786 787 r = static_cast<float>(gr[0]); 788 g = static_cast<float>(gr[1]); 789 } 790 break; 791 case FORMAT_G32R32UI: 792 { 793 unsigned int* gr = (unsigned int*)element; 794 795 r = static_cast<float>(gr[0]); 796 g = static_cast<float>(gr[1]); 797 } 798 break; 799 case FORMAT_R32I: 800 r = static_cast<float>(*((int*)element)); 801 break; 802 case FORMAT_R32UI: 803 r = static_cast<float>(*((unsigned int*)element)); 804 break; 805 case FORMAT_V8U8: 806 { 807 unsigned short vu = *(unsigned short*)element; 808 809 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000); 810 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000); 811 } 812 break; 813 case FORMAT_L6V5U5: 814 { 815 unsigned short lvu = *(unsigned short*)element; 816 817 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000); 818 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000); 819 b = (lvu & 0xFC00) * (1.0f / 0xFC00); 820 } 821 break; 822 case FORMAT_Q8W8V8U8: 823 { 824 unsigned int qwvu = *(unsigned int*)element; 825 826 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 827 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 828 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000); 829 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000); 830 } 831 break; 832 case FORMAT_X8L8V8U8: 833 { 834 unsigned int xlvu = *(unsigned int*)element; 835 836 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 837 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 838 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000); 839 } 840 break; 841 case FORMAT_R8G8B8: 842 r = ((unsigned char*)element)[2] * (1.0f / 0xFF); 843 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 844 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 845 break; 846 case FORMAT_B8G8R8: 847 r = ((unsigned char*)element)[0] * (1.0f / 0xFF); 848 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 849 b = ((unsigned char*)element)[2] * (1.0f / 0xFF); 850 break; 851 case FORMAT_V16U16: 852 { 853 unsigned int vu = *(unsigned int*)element; 854 855 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000); 856 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000); 857 } 858 break; 859 case FORMAT_A2W10V10U10: 860 { 861 unsigned int awvu = *(unsigned int*)element; 862 863 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000); 864 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000); 865 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000); 866 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000); 867 } 868 break; 869 case FORMAT_A16W16V16U16: 870 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 871 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 872 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 873 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 874 break; 875 case FORMAT_Q16W16V16U16: 876 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 877 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 878 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 879 a = ((signed short*)element)[3] * (1.0f / 0x7FFF); 880 break; 881 case FORMAT_L8: 882 r = 883 g = 884 b = *(unsigned char*)element * (1.0f / 0xFF); 885 break; 886 case FORMAT_A4L4: 887 { 888 unsigned char al = *(unsigned char*)element; 889 890 r = 891 g = 892 b = (al & 0x0F) * (1.0f / 0x0F); 893 a = (al & 0xF0) * (1.0f / 0xF0); 894 } 895 break; 896 case FORMAT_L16: 897 r = 898 g = 899 b = *(unsigned short*)element * (1.0f / 0xFFFF); 900 break; 901 case FORMAT_A8L8: 902 r = 903 g = 904 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 905 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 906 break; 907 case FORMAT_L16F: 908 r = 909 g = 910 b = *(half*)element; 911 break; 912 case FORMAT_A16L16F: 913 r = 914 g = 915 b = ((half*)element)[0]; 916 a = ((half*)element)[1]; 917 break; 918 case FORMAT_L32F: 919 r = 920 g = 921 b = *(float*)element; 922 break; 923 case FORMAT_A32L32F: 924 r = 925 g = 926 b = ((float*)element)[0]; 927 a = ((float*)element)[1]; 928 break; 929 case FORMAT_A16F: 930 a = *(half*)element; 931 break; 932 case FORMAT_R16F: 933 r = *(half*)element; 934 break; 935 case FORMAT_G16R16F: 936 r = ((half*)element)[0]; 937 g = ((half*)element)[1]; 938 break; 939 case FORMAT_B16G16R16F: 940 r = ((half*)element)[0]; 941 g = ((half*)element)[1]; 942 b = ((half*)element)[2]; 943 break; 944 case FORMAT_A16B16G16R16F: 945 r = ((half*)element)[0]; 946 g = ((half*)element)[1]; 947 b = ((half*)element)[2]; 948 a = ((half*)element)[3]; 949 break; 950 case FORMAT_A32F: 951 a = *(float*)element; 952 break; 953 case FORMAT_R32F: 954 r = *(float*)element; 955 break; 956 case FORMAT_G32R32F: 957 r = ((float*)element)[0]; 958 g = ((float*)element)[1]; 959 break; 960 case FORMAT_X32B32G32R32F: 961 case FORMAT_B32G32R32F: 962 r = ((float*)element)[0]; 963 g = ((float*)element)[1]; 964 b = ((float*)element)[2]; 965 break; 966 case FORMAT_A32B32G32R32F: 967 r = ((float*)element)[0]; 968 g = ((float*)element)[1]; 969 b = ((float*)element)[2]; 970 a = ((float*)element)[3]; 971 break; 972 case FORMAT_D32F: 973 case FORMAT_D32F_LOCKABLE: 974 case FORMAT_D32FS8_TEXTURE: 975 case FORMAT_D32FS8_SHADOW: 976 r = *(float*)element; 977 g = r; 978 b = r; 979 a = r; 980 break; 981 case FORMAT_D32F_COMPLEMENTARY: 982 r = 1.0f - *(float*)element; 983 g = r; 984 b = r; 985 a = r; 986 break; 987 case FORMAT_S8: 988 r = *(unsigned char*)element * (1.0f / 0xFF); 989 break; 990 default: 991 ASSERT(false); 992 } 993 994 // if(sRGB) 995 // { 996 // r = sRGBtoLinear(r); 997 // g = sRGBtoLinear(g); 998 // b = sRGBtoLinear(b); 999 // } 1000 1001 return Color<float>(r, g, b, a); 1002 } 1003 sample(float x,float y,float z) const1004 Color<float> Surface::Buffer::sample(float x, float y, float z) const 1005 { 1006 x -= 0.5f; 1007 y -= 0.5f; 1008 z -= 0.5f; 1009 1010 int x0 = clamp((int)x, 0, width - 1); 1011 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1012 1013 int y0 = clamp((int)y, 0, height - 1); 1014 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1015 1016 int z0 = clamp((int)z, 0, depth - 1); 1017 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1; 1018 1019 Color<float> c000 = read(x0, y0, z0); 1020 Color<float> c100 = read(x1, y0, z0); 1021 Color<float> c010 = read(x0, y1, z0); 1022 Color<float> c110 = read(x1, y1, z0); 1023 Color<float> c001 = read(x0, y0, z1); 1024 Color<float> c101 = read(x1, y0, z1); 1025 Color<float> c011 = read(x0, y1, z1); 1026 Color<float> c111 = read(x1, y1, z1); 1027 1028 float fx = x - x0; 1029 float fy = y - y0; 1030 float fz = z - z0; 1031 1032 c000 *= (1 - fx) * (1 - fy) * (1 - fz); 1033 c100 *= fx * (1 - fy) * (1 - fz); 1034 c010 *= (1 - fx) * fy * (1 - fz); 1035 c110 *= fx * fy * (1 - fz); 1036 c001 *= (1 - fx) * (1 - fy) * fz; 1037 c101 *= fx * (1 - fy) * fz; 1038 c011 *= (1 - fx) * fy * fz; 1039 c111 *= fx * fy * fz; 1040 1041 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111; 1042 } 1043 sample(float x,float y) const1044 Color<float> Surface::Buffer::sample(float x, float y) const 1045 { 1046 x -= 0.5f; 1047 y -= 0.5f; 1048 1049 int x0 = clamp((int)x, 0, width - 1); 1050 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1051 1052 int y0 = clamp((int)y, 0, height - 1); 1053 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1054 1055 Color<float> c00 = read(x0, y0); 1056 Color<float> c10 = read(x1, y0); 1057 Color<float> c01 = read(x0, y1); 1058 Color<float> c11 = read(x1, y1); 1059 1060 float fx = x - x0; 1061 float fy = y - y0; 1062 1063 c00 *= (1 - fx) * (1 - fy); 1064 c10 *= fx * (1 - fy); 1065 c01 *= (1 - fx) * fy; 1066 c11 *= fx * fy; 1067 1068 return c00 + c10 + c01 + c11; 1069 } 1070 lockRect(int x,int y,int z,Lock lock)1071 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock) 1072 { 1073 this->lock = lock; 1074 1075 switch(lock) 1076 { 1077 case LOCK_UNLOCKED: 1078 case LOCK_READONLY: 1079 break; 1080 case LOCK_WRITEONLY: 1081 case LOCK_READWRITE: 1082 case LOCK_DISCARD: 1083 dirty = true; 1084 break; 1085 default: 1086 ASSERT(false); 1087 } 1088 1089 if(buffer) 1090 { 1091 switch(format) 1092 { 1093 #if S3TC_SUPPORT 1094 case FORMAT_DXT1: 1095 #endif 1096 case FORMAT_ATI1: 1097 case FORMAT_ETC1: 1098 case FORMAT_R11_EAC: 1099 case FORMAT_SIGNED_R11_EAC: 1100 case FORMAT_RGB8_ETC2: 1101 case FORMAT_SRGB8_ETC2: 1102 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1103 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1104 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1105 case FORMAT_RG11_EAC: 1106 case FORMAT_SIGNED_RG11_EAC: 1107 case FORMAT_RGBA8_ETC2_EAC: 1108 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1109 case FORMAT_RGBA_ASTC_4x4_KHR: 1110 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1111 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1112 case FORMAT_RGBA_ASTC_5x4_KHR: 1113 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1114 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB; 1115 case FORMAT_RGBA_ASTC_5x5_KHR: 1116 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1117 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB; 1118 case FORMAT_RGBA_ASTC_6x5_KHR: 1119 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1120 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB; 1121 case FORMAT_RGBA_ASTC_6x6_KHR: 1122 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1123 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB; 1124 case FORMAT_RGBA_ASTC_8x5_KHR: 1125 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1126 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB; 1127 case FORMAT_RGBA_ASTC_8x6_KHR: 1128 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1129 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB; 1130 case FORMAT_RGBA_ASTC_8x8_KHR: 1131 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1132 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB; 1133 case FORMAT_RGBA_ASTC_10x5_KHR: 1134 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1135 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB; 1136 case FORMAT_RGBA_ASTC_10x6_KHR: 1137 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1138 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB; 1139 case FORMAT_RGBA_ASTC_10x8_KHR: 1140 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1141 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB; 1142 case FORMAT_RGBA_ASTC_10x10_KHR: 1143 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1144 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB; 1145 case FORMAT_RGBA_ASTC_12x10_KHR: 1146 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1147 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB; 1148 case FORMAT_RGBA_ASTC_12x12_KHR: 1149 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1150 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB; 1151 #if S3TC_SUPPORT 1152 case FORMAT_DXT3: 1153 case FORMAT_DXT5: 1154 #endif 1155 case FORMAT_ATI2: 1156 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1157 default: 1158 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 1159 } 1160 } 1161 1162 return 0; 1163 } 1164 unlockRect()1165 void Surface::Buffer::unlockRect() 1166 { 1167 lock = LOCK_UNLOCKED; 1168 } 1169 1170 class SurfaceImplementation : public Surface 1171 { 1172 public: SurfaceImplementation(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1173 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1174 : Surface(width, height, depth, format, pixels, pitch, slice) {} SurfaceImplementation(Resource * texture,int width,int height,int depth,Format format,bool lockable,bool renderTarget,int pitchP=0)1175 SurfaceImplementation(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchP = 0) 1176 : Surface(texture, width, height, depth, format, lockable, renderTarget, pitchP) {} ~SurfaceImplementation()1177 ~SurfaceImplementation() override {}; 1178 lockInternal(int x,int y,int z,Lock lock,Accessor client)1179 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override 1180 { 1181 return Surface::lockInternal(x, y, z, lock, client); 1182 } 1183 unlockInternal()1184 void unlockInternal() override 1185 { 1186 Surface::unlockInternal(); 1187 } 1188 }; 1189 create(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1190 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1191 { 1192 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice); 1193 } 1194 create(Resource * texture,int width,int height,int depth,Format format,bool lockable,bool renderTarget,int pitchPprovided)1195 Surface *Surface::create(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) 1196 { 1197 return new SurfaceImplementation(texture, width, height, depth, format, lockable, renderTarget, pitchPprovided); 1198 } 1199 Surface(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1200 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false) 1201 { 1202 resource = new Resource(0); 1203 hasParent = false; 1204 ownExternal = false; 1205 depth = max(1, depth); 1206 1207 external.buffer = pixels; 1208 external.width = width; 1209 external.height = height; 1210 external.depth = depth; 1211 external.format = format; 1212 external.bytes = bytes(external.format); 1213 external.pitchB = pitch; 1214 external.pitchP = external.bytes ? pitch / external.bytes : 0; 1215 external.sliceB = slice; 1216 external.sliceP = external.bytes ? slice / external.bytes : 0; 1217 external.lock = LOCK_UNLOCKED; 1218 external.dirty = true; 1219 1220 internal.buffer = 0; 1221 internal.width = width; 1222 internal.height = height; 1223 internal.depth = depth; 1224 internal.format = selectInternalFormat(format); 1225 internal.bytes = bytes(internal.format); 1226 internal.pitchB = pitchB(internal.width, internal.format, false); 1227 internal.pitchP = pitchP(internal.width, internal.format, false); 1228 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false); 1229 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false); 1230 internal.lock = LOCK_UNLOCKED; 1231 internal.dirty = false; 1232 1233 stencil.buffer = 0; 1234 stencil.width = width; 1235 stencil.height = height; 1236 stencil.depth = depth; 1237 stencil.format = FORMAT_S8; 1238 stencil.bytes = bytes(stencil.format); 1239 stencil.pitchB = pitchB(stencil.width, stencil.format, false); 1240 stencil.pitchP = pitchP(stencil.width, stencil.format, false); 1241 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false); 1242 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false); 1243 stencil.lock = LOCK_UNLOCKED; 1244 stencil.dirty = false; 1245 1246 dirtyMipmaps = true; 1247 paletteUsed = 0; 1248 } 1249 Surface(Resource * texture,int width,int height,int depth,Format format,bool lockable,bool renderTarget,int pitchPprovided)1250 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget) 1251 { 1252 resource = texture ? texture : new Resource(0); 1253 hasParent = texture != 0; 1254 ownExternal = true; 1255 depth = max(1, depth); 1256 1257 external.buffer = 0; 1258 external.width = width; 1259 external.height = height; 1260 external.depth = depth; 1261 external.format = format; 1262 external.bytes = bytes(external.format); 1263 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture); 1264 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture); 1265 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture); 1266 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture); 1267 external.lock = LOCK_UNLOCKED; 1268 external.dirty = false; 1269 1270 internal.buffer = 0; 1271 internal.width = width; 1272 internal.height = height; 1273 internal.depth = depth; 1274 internal.format = selectInternalFormat(format); 1275 internal.bytes = bytes(internal.format); 1276 internal.pitchB = !pitchPprovided ? pitchB(internal.width, internal.format, renderTarget) : pitchPprovided * internal.bytes; 1277 internal.pitchP = !pitchPprovided ? pitchP(internal.width, internal.format, renderTarget) : pitchPprovided; 1278 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget); 1279 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget); 1280 internal.lock = LOCK_UNLOCKED; 1281 internal.dirty = false; 1282 1283 stencil.buffer = 0; 1284 stencil.width = width; 1285 stencil.height = height; 1286 stencil.depth = depth; 1287 stencil.format = FORMAT_S8; 1288 stencil.bytes = bytes(stencil.format); 1289 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget); 1290 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget); 1291 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget); 1292 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget); 1293 stencil.lock = LOCK_UNLOCKED; 1294 stencil.dirty = false; 1295 1296 dirtyMipmaps = true; 1297 paletteUsed = 0; 1298 } 1299 ~Surface()1300 Surface::~Surface() 1301 { 1302 // sync() must be called before this destructor to ensure all locks have been released. 1303 // We can't call it here because the parent resource may already have been destroyed. 1304 ASSERT(isUnlocked()); 1305 1306 if(!hasParent) 1307 { 1308 resource->destruct(); 1309 } 1310 1311 if(ownExternal) 1312 { 1313 deallocate(external.buffer); 1314 } 1315 1316 if(internal.buffer != external.buffer) 1317 { 1318 deallocate(internal.buffer); 1319 } 1320 1321 deallocate(stencil.buffer); 1322 1323 external.buffer = 0; 1324 internal.buffer = 0; 1325 stencil.buffer = 0; 1326 } 1327 lockExternal(int x,int y,int z,Lock lock,Accessor client)1328 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client) 1329 { 1330 resource->lock(client); 1331 1332 if(!external.buffer) 1333 { 1334 if(internal.buffer && identicalFormats()) 1335 { 1336 external.buffer = internal.buffer; 1337 } 1338 else 1339 { 1340 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format); 1341 } 1342 } 1343 1344 if(internal.dirty) 1345 { 1346 if(lock != LOCK_DISCARD) 1347 { 1348 update(external, internal); 1349 } 1350 1351 internal.dirty = false; 1352 } 1353 1354 switch(lock) 1355 { 1356 case LOCK_READONLY: 1357 break; 1358 case LOCK_WRITEONLY: 1359 case LOCK_READWRITE: 1360 case LOCK_DISCARD: 1361 dirtyMipmaps = true; 1362 break; 1363 default: 1364 ASSERT(false); 1365 } 1366 1367 return external.lockRect(x, y, z, lock); 1368 } 1369 unlockExternal()1370 void Surface::unlockExternal() 1371 { 1372 external.unlockRect(); 1373 1374 resource->unlock(); 1375 } 1376 lockInternal(int x,int y,int z,Lock lock,Accessor client)1377 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client) 1378 { 1379 if(lock != LOCK_UNLOCKED) 1380 { 1381 resource->lock(client); 1382 } 1383 1384 if(!internal.buffer) 1385 { 1386 if(external.buffer && identicalFormats()) 1387 { 1388 internal.buffer = external.buffer; 1389 } 1390 else 1391 { 1392 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format); 1393 } 1394 } 1395 1396 // FIXME: WHQL requires conversion to lower external precision and back 1397 if(logPrecision >= WHQL) 1398 { 1399 if(internal.dirty && renderTarget && internal.format != external.format) 1400 { 1401 if(lock != LOCK_DISCARD) 1402 { 1403 switch(external.format) 1404 { 1405 case FORMAT_R3G3B2: 1406 case FORMAT_A8R3G3B2: 1407 case FORMAT_A1R5G5B5: 1408 case FORMAT_A2R10G10B10: 1409 case FORMAT_A2B10G10R10: 1410 lockExternal(0, 0, 0, LOCK_READWRITE, client); 1411 unlockExternal(); 1412 break; 1413 default: 1414 // Difference passes WHQL 1415 break; 1416 } 1417 } 1418 } 1419 } 1420 1421 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID)) 1422 { 1423 if(lock != LOCK_DISCARD) 1424 { 1425 update(internal, external); 1426 } 1427 1428 external.dirty = false; 1429 paletteUsed = Surface::paletteID; 1430 } 1431 1432 switch(lock) 1433 { 1434 case LOCK_UNLOCKED: 1435 case LOCK_READONLY: 1436 break; 1437 case LOCK_WRITEONLY: 1438 case LOCK_READWRITE: 1439 case LOCK_DISCARD: 1440 dirtyMipmaps = true; 1441 break; 1442 default: 1443 ASSERT(false); 1444 } 1445 1446 if(lock == LOCK_READONLY && client == PUBLIC) 1447 { 1448 resolve(); 1449 } 1450 1451 return internal.lockRect(x, y, z, lock); 1452 } 1453 unlockInternal()1454 void Surface::unlockInternal() 1455 { 1456 internal.unlockRect(); 1457 1458 resource->unlock(); 1459 } 1460 lockStencil(int x,int y,int front,Accessor client)1461 void *Surface::lockStencil(int x, int y, int front, Accessor client) 1462 { 1463 resource->lock(client); 1464 1465 if(!stencil.buffer) 1466 { 1467 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format); 1468 } 1469 1470 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME 1471 } 1472 unlockStencil()1473 void Surface::unlockStencil() 1474 { 1475 stencil.unlockRect(); 1476 1477 resource->unlock(); 1478 } 1479 bytes(Format format)1480 int Surface::bytes(Format format) 1481 { 1482 switch(format) 1483 { 1484 case FORMAT_NULL: return 0; 1485 case FORMAT_P8: return 1; 1486 case FORMAT_A8P8: return 2; 1487 case FORMAT_A8: return 1; 1488 case FORMAT_R8I: return 1; 1489 case FORMAT_R8: return 1; 1490 case FORMAT_R3G3B2: return 1; 1491 case FORMAT_R16I: return 2; 1492 case FORMAT_R16UI: return 2; 1493 case FORMAT_A8R3G3B2: return 2; 1494 case FORMAT_R5G6B5: return 2; 1495 case FORMAT_A1R5G5B5: return 2; 1496 case FORMAT_X1R5G5B5: return 2; 1497 case FORMAT_R5G5B5A1: return 2; 1498 case FORMAT_X4R4G4B4: return 2; 1499 case FORMAT_A4R4G4B4: return 2; 1500 case FORMAT_R4G4B4A4: return 2; 1501 case FORMAT_R8G8B8: return 3; 1502 case FORMAT_B8G8R8: return 3; 1503 case FORMAT_R32I: return 4; 1504 case FORMAT_R32UI: return 4; 1505 case FORMAT_X8R8G8B8: return 4; 1506 // case FORMAT_X8G8R8B8Q: return 4; 1507 case FORMAT_A8R8G8B8: return 4; 1508 // case FORMAT_A8G8R8B8Q: return 4; 1509 case FORMAT_X8B8G8R8I: return 4; 1510 case FORMAT_X8B8G8R8: return 4; 1511 case FORMAT_SRGB8_X8: return 4; 1512 case FORMAT_SRGB8_A8: return 4; 1513 case FORMAT_A8B8G8R8I: return 4; 1514 case FORMAT_R8UI: return 1; 1515 case FORMAT_G8R8UI: return 2; 1516 case FORMAT_X8B8G8R8UI: return 4; 1517 case FORMAT_A8B8G8R8UI: return 4; 1518 case FORMAT_A8B8G8R8: return 4; 1519 case FORMAT_R8I_SNORM: return 1; 1520 case FORMAT_G8R8I_SNORM: return 2; 1521 case FORMAT_X8B8G8R8I_SNORM: return 4; 1522 case FORMAT_A8B8G8R8I_SNORM: return 4; 1523 case FORMAT_A2R10G10B10: return 4; 1524 case FORMAT_A2B10G10R10: return 4; 1525 case FORMAT_G8R8I: return 2; 1526 case FORMAT_G8R8: return 2; 1527 case FORMAT_G16R16I: return 4; 1528 case FORMAT_G16R16UI: return 4; 1529 case FORMAT_G16R16: return 4; 1530 case FORMAT_G32R32I: return 8; 1531 case FORMAT_G32R32UI: return 8; 1532 case FORMAT_X16B16G16R16I: return 8; 1533 case FORMAT_X16B16G16R16UI: return 8; 1534 case FORMAT_A16B16G16R16I: return 8; 1535 case FORMAT_A16B16G16R16UI: return 8; 1536 case FORMAT_A16B16G16R16: return 8; 1537 case FORMAT_X32B32G32R32I: return 16; 1538 case FORMAT_X32B32G32R32UI: return 16; 1539 case FORMAT_A32B32G32R32I: return 16; 1540 case FORMAT_A32B32G32R32UI: return 16; 1541 // Compressed formats 1542 #if S3TC_SUPPORT 1543 case FORMAT_DXT1: return 2; // Column of four pixels 1544 case FORMAT_DXT3: return 4; // Column of four pixels 1545 case FORMAT_DXT5: return 4; // Column of four pixels 1546 #endif 1547 case FORMAT_ATI1: return 2; // Column of four pixels 1548 case FORMAT_ATI2: return 4; // Column of four pixels 1549 case FORMAT_ETC1: return 2; // Column of four pixels 1550 case FORMAT_R11_EAC: return 2; 1551 case FORMAT_SIGNED_R11_EAC: return 2; 1552 case FORMAT_RG11_EAC: return 4; 1553 case FORMAT_SIGNED_RG11_EAC: return 4; 1554 case FORMAT_RGB8_ETC2: return 2; 1555 case FORMAT_SRGB8_ETC2: return 2; 1556 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1557 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1558 case FORMAT_RGBA8_ETC2_EAC: return 4; 1559 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4; 1560 case FORMAT_RGBA_ASTC_4x4_KHR: 1561 case FORMAT_RGBA_ASTC_5x4_KHR: 1562 case FORMAT_RGBA_ASTC_5x5_KHR: 1563 case FORMAT_RGBA_ASTC_6x5_KHR: 1564 case FORMAT_RGBA_ASTC_6x6_KHR: 1565 case FORMAT_RGBA_ASTC_8x5_KHR: 1566 case FORMAT_RGBA_ASTC_8x6_KHR: 1567 case FORMAT_RGBA_ASTC_8x8_KHR: 1568 case FORMAT_RGBA_ASTC_10x5_KHR: 1569 case FORMAT_RGBA_ASTC_10x6_KHR: 1570 case FORMAT_RGBA_ASTC_10x8_KHR: 1571 case FORMAT_RGBA_ASTC_10x10_KHR: 1572 case FORMAT_RGBA_ASTC_12x10_KHR: 1573 case FORMAT_RGBA_ASTC_12x12_KHR: 1574 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1575 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1576 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1577 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1578 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1579 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1580 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1581 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1582 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1583 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1584 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1585 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1586 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1587 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME 1588 // Bumpmap formats 1589 case FORMAT_V8U8: return 2; 1590 case FORMAT_L6V5U5: return 2; 1591 case FORMAT_Q8W8V8U8: return 4; 1592 case FORMAT_X8L8V8U8: return 4; 1593 case FORMAT_A2W10V10U10: return 4; 1594 case FORMAT_V16U16: return 4; 1595 case FORMAT_A16W16V16U16: return 8; 1596 case FORMAT_Q16W16V16U16: return 8; 1597 // Luminance formats 1598 case FORMAT_L8: return 1; 1599 case FORMAT_A4L4: return 1; 1600 case FORMAT_L16: return 2; 1601 case FORMAT_A8L8: return 2; 1602 case FORMAT_L16F: return 2; 1603 case FORMAT_A16L16F: return 4; 1604 case FORMAT_L32F: return 4; 1605 case FORMAT_A32L32F: return 8; 1606 // Floating-point formats 1607 case FORMAT_A16F: return 2; 1608 case FORMAT_R16F: return 2; 1609 case FORMAT_G16R16F: return 4; 1610 case FORMAT_B16G16R16F: return 6; 1611 case FORMAT_A16B16G16R16F: return 8; 1612 case FORMAT_A32F: return 4; 1613 case FORMAT_R32F: return 4; 1614 case FORMAT_G32R32F: return 8; 1615 case FORMAT_B32G32R32F: return 12; 1616 case FORMAT_X32B32G32R32F: return 16; 1617 case FORMAT_A32B32G32R32F: return 16; 1618 // Depth/stencil formats 1619 case FORMAT_D16: return 2; 1620 case FORMAT_D32: return 4; 1621 case FORMAT_D24X8: return 4; 1622 case FORMAT_D24S8: return 4; 1623 case FORMAT_D24FS8: return 4; 1624 case FORMAT_D32F: return 4; 1625 case FORMAT_D32F_COMPLEMENTARY: return 4; 1626 case FORMAT_D32F_LOCKABLE: return 4; 1627 case FORMAT_D32FS8_TEXTURE: return 4; 1628 case FORMAT_D32FS8_SHADOW: return 4; 1629 case FORMAT_DF24S8: return 4; 1630 case FORMAT_DF16S8: return 2; 1631 case FORMAT_INTZ: return 4; 1632 case FORMAT_S8: return 1; 1633 case FORMAT_YV12_BT601: return 1; // Y plane only 1634 case FORMAT_YV12_BT709: return 1; // Y plane only 1635 case FORMAT_YV12_JFIF: return 1; // Y plane only 1636 default: 1637 ASSERT(false); 1638 } 1639 1640 return 0; 1641 } 1642 pitchB(int width,Format format,bool target)1643 int Surface::pitchB(int width, Format format, bool target) 1644 { 1645 if(target || isDepth(format) || isStencil(format)) 1646 { 1647 width = align(width, 2); 1648 } 1649 1650 switch(format) 1651 { 1652 #if S3TC_SUPPORT 1653 case FORMAT_DXT1: 1654 #endif 1655 case FORMAT_ETC1: 1656 case FORMAT_R11_EAC: 1657 case FORMAT_SIGNED_R11_EAC: 1658 case FORMAT_RGB8_ETC2: 1659 case FORMAT_SRGB8_ETC2: 1660 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1661 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1662 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows 1663 case FORMAT_RG11_EAC: 1664 case FORMAT_SIGNED_RG11_EAC: 1665 case FORMAT_RGBA8_ETC2_EAC: 1666 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1667 case FORMAT_RGBA_ASTC_4x4_KHR: 1668 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1669 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1670 case FORMAT_RGBA_ASTC_5x4_KHR: 1671 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1672 case FORMAT_RGBA_ASTC_5x5_KHR: 1673 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1674 return 16 * ((width + 4) / 5); 1675 case FORMAT_RGBA_ASTC_6x5_KHR: 1676 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1677 case FORMAT_RGBA_ASTC_6x6_KHR: 1678 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1679 return 16 * ((width + 5) / 6); 1680 case FORMAT_RGBA_ASTC_8x5_KHR: 1681 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1682 case FORMAT_RGBA_ASTC_8x6_KHR: 1683 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1684 case FORMAT_RGBA_ASTC_8x8_KHR: 1685 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1686 return 16 * ((width + 7) / 8); 1687 case FORMAT_RGBA_ASTC_10x5_KHR: 1688 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1689 case FORMAT_RGBA_ASTC_10x6_KHR: 1690 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1691 case FORMAT_RGBA_ASTC_10x8_KHR: 1692 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1693 case FORMAT_RGBA_ASTC_10x10_KHR: 1694 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1695 return 16 * ((width + 9) / 10); 1696 case FORMAT_RGBA_ASTC_12x10_KHR: 1697 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1698 case FORMAT_RGBA_ASTC_12x12_KHR: 1699 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1700 return 16 * ((width + 11) / 12); 1701 #if S3TC_SUPPORT 1702 case FORMAT_DXT3: 1703 case FORMAT_DXT5: 1704 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1705 #endif 1706 case FORMAT_ATI1: 1707 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row 1708 case FORMAT_ATI2: 1709 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row 1710 case FORMAT_YV12_BT601: 1711 case FORMAT_YV12_BT709: 1712 case FORMAT_YV12_JFIF: 1713 return align(width, 16); 1714 default: 1715 return bytes(format) * width; 1716 } 1717 } 1718 pitchP(int width,Format format,bool target)1719 int Surface::pitchP(int width, Format format, bool target) 1720 { 1721 int B = bytes(format); 1722 1723 return B > 0 ? pitchB(width, format, target) / B : 0; 1724 } 1725 sliceB(int width,int height,Format format,bool target)1726 int Surface::sliceB(int width, int height, Format format, bool target) 1727 { 1728 if(target || isDepth(format) || isStencil(format)) 1729 { 1730 height = ((height + 1) & ~1); 1731 } 1732 1733 switch(format) 1734 { 1735 #if S3TC_SUPPORT 1736 case FORMAT_DXT1: 1737 case FORMAT_DXT3: 1738 case FORMAT_DXT5: 1739 #endif 1740 case FORMAT_ETC1: 1741 case FORMAT_R11_EAC: 1742 case FORMAT_SIGNED_R11_EAC: 1743 case FORMAT_RG11_EAC: 1744 case FORMAT_SIGNED_RG11_EAC: 1745 case FORMAT_RGB8_ETC2: 1746 case FORMAT_SRGB8_ETC2: 1747 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1748 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1749 case FORMAT_RGBA8_ETC2_EAC: 1750 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1751 case FORMAT_RGBA_ASTC_4x4_KHR: 1752 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1753 case FORMAT_RGBA_ASTC_5x4_KHR: 1754 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1755 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows 1756 case FORMAT_RGBA_ASTC_5x5_KHR: 1757 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1758 case FORMAT_RGBA_ASTC_6x5_KHR: 1759 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1760 case FORMAT_RGBA_ASTC_8x5_KHR: 1761 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1762 case FORMAT_RGBA_ASTC_10x5_KHR: 1763 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1764 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows 1765 case FORMAT_RGBA_ASTC_6x6_KHR: 1766 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1767 case FORMAT_RGBA_ASTC_8x6_KHR: 1768 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1769 case FORMAT_RGBA_ASTC_10x6_KHR: 1770 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1771 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows 1772 case FORMAT_RGBA_ASTC_8x8_KHR: 1773 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1774 case FORMAT_RGBA_ASTC_10x8_KHR: 1775 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1776 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows 1777 case FORMAT_RGBA_ASTC_10x10_KHR: 1778 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1779 case FORMAT_RGBA_ASTC_12x10_KHR: 1780 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1781 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows 1782 case FORMAT_RGBA_ASTC_12x12_KHR: 1783 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1784 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows 1785 case FORMAT_ATI1: 1786 case FORMAT_ATI2: 1787 default: 1788 return pitchB(width, format, target) * height; // Pitch computed per row 1789 } 1790 } 1791 sliceP(int width,int height,Format format,bool target)1792 int Surface::sliceP(int width, int height, Format format, bool target) 1793 { 1794 int B = bytes(format); 1795 1796 return B > 0 ? sliceB(width, height, format, target) / B : 0; 1797 } 1798 update(Buffer & destination,Buffer & source)1799 void Surface::update(Buffer &destination, Buffer &source) 1800 { 1801 // ASSERT(source.lock != LOCK_UNLOCKED); 1802 // ASSERT(destination.lock != LOCK_UNLOCKED); 1803 1804 if(destination.buffer != source.buffer) 1805 { 1806 ASSERT(source.dirty && !destination.dirty); 1807 1808 switch(source.format) 1809 { 1810 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format 1811 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format 1812 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format 1813 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format 1814 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format 1815 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format 1816 #if S3TC_SUPPORT 1817 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format 1818 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format 1819 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format 1820 #endif 1821 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format 1822 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format 1823 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format 1824 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format 1825 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format 1826 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format 1827 case FORMAT_ETC1: 1828 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format 1829 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format 1830 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format 1831 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format 1832 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format 1833 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format 1834 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format 1835 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format 1836 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format 1837 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format 1838 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format 1839 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format 1840 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format 1841 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format 1842 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format 1843 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format 1844 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format 1845 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format 1846 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format 1847 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format 1848 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format 1849 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format 1850 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format 1851 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format 1852 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format 1853 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format 1854 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format 1855 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format 1856 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format 1857 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format 1858 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format 1859 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format 1860 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format 1861 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format 1862 default: genericUpdate(destination, source); break; 1863 } 1864 } 1865 } 1866 genericUpdate(Buffer & destination,Buffer & source)1867 void Surface::genericUpdate(Buffer &destination, Buffer &source) 1868 { 1869 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1870 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1871 1872 int depth = min(destination.depth, source.depth); 1873 int height = min(destination.height, source.height); 1874 int width = min(destination.width, source.width); 1875 int rowBytes = width * source.bytes; 1876 1877 for(int z = 0; z < depth; z++) 1878 { 1879 unsigned char *sourceRow = sourceSlice; 1880 unsigned char *destinationRow = destinationSlice; 1881 1882 for(int y = 0; y < height; y++) 1883 { 1884 if(source.format == destination.format) 1885 { 1886 memcpy(destinationRow, sourceRow, rowBytes); 1887 } 1888 else 1889 { 1890 unsigned char *sourceElement = sourceRow; 1891 unsigned char *destinationElement = destinationRow; 1892 1893 for(int x = 0; x < width; x++) 1894 { 1895 Color<float> color = source.read(sourceElement); 1896 destination.write(destinationElement, color); 1897 1898 sourceElement += source.bytes; 1899 destinationElement += destination.bytes; 1900 } 1901 } 1902 1903 sourceRow += source.pitchB; 1904 destinationRow += destination.pitchB; 1905 } 1906 1907 sourceSlice += source.sliceB; 1908 destinationSlice += destination.sliceB; 1909 } 1910 } 1911 decodeR8G8B8(Buffer & destination,const Buffer & source)1912 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source) 1913 { 1914 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1915 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1916 1917 for(int z = 0; z < destination.depth && z < source.depth; z++) 1918 { 1919 unsigned char *sourceRow = sourceSlice; 1920 unsigned char *destinationRow = destinationSlice; 1921 1922 for(int y = 0; y < destination.height && y < source.height; y++) 1923 { 1924 unsigned char *sourceElement = sourceRow; 1925 unsigned char *destinationElement = destinationRow; 1926 1927 for(int x = 0; x < destination.width && x < source.width; x++) 1928 { 1929 unsigned int b = sourceElement[0]; 1930 unsigned int g = sourceElement[1]; 1931 unsigned int r = sourceElement[2]; 1932 1933 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0); 1934 1935 sourceElement += source.bytes; 1936 destinationElement += destination.bytes; 1937 } 1938 1939 sourceRow += source.pitchB; 1940 destinationRow += destination.pitchB; 1941 } 1942 1943 sourceSlice += source.sliceB; 1944 destinationSlice += destination.sliceB; 1945 } 1946 } 1947 decodeX1R5G5B5(Buffer & destination,const Buffer & source)1948 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source) 1949 { 1950 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1951 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1952 1953 for(int z = 0; z < destination.depth && z < source.depth; z++) 1954 { 1955 unsigned char *sourceRow = sourceSlice; 1956 unsigned char *destinationRow = destinationSlice; 1957 1958 for(int y = 0; y < destination.height && y < source.height; y++) 1959 { 1960 unsigned char *sourceElement = sourceRow; 1961 unsigned char *destinationElement = destinationRow; 1962 1963 for(int x = 0; x < destination.width && x < source.width; x++) 1964 { 1965 unsigned int xrgb = *(unsigned short*)sourceElement; 1966 1967 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1968 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1969 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8); 1970 1971 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 1972 1973 sourceElement += source.bytes; 1974 destinationElement += destination.bytes; 1975 } 1976 1977 sourceRow += source.pitchB; 1978 destinationRow += destination.pitchB; 1979 } 1980 1981 sourceSlice += source.sliceB; 1982 destinationSlice += destination.sliceB; 1983 } 1984 } 1985 decodeA1R5G5B5(Buffer & destination,const Buffer & source)1986 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source) 1987 { 1988 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1989 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1990 1991 for(int z = 0; z < destination.depth && z < source.depth; z++) 1992 { 1993 unsigned char *sourceRow = sourceSlice; 1994 unsigned char *destinationRow = destinationSlice; 1995 1996 for(int y = 0; y < destination.height && y < source.height; y++) 1997 { 1998 unsigned char *sourceElement = sourceRow; 1999 unsigned char *destinationElement = destinationRow; 2000 2001 for(int x = 0; x < destination.width && x < source.width; x++) 2002 { 2003 unsigned int argb = *(unsigned short*)sourceElement; 2004 2005 unsigned int a = (argb & 0x8000) * 130560; 2006 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 2007 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 2008 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8); 2009 2010 *(unsigned int*)destinationElement = a | r | g | b; 2011 2012 sourceElement += source.bytes; 2013 destinationElement += destination.bytes; 2014 } 2015 2016 sourceRow += source.pitchB; 2017 destinationRow += destination.pitchB; 2018 } 2019 2020 sourceSlice += source.sliceB; 2021 destinationSlice += destination.sliceB; 2022 } 2023 } 2024 decodeX4R4G4B4(Buffer & destination,const Buffer & source)2025 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source) 2026 { 2027 unsigned char *sourceSlice = (unsigned char*)source.buffer; 2028 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 2029 2030 for(int z = 0; z < destination.depth && z < source.depth; z++) 2031 { 2032 unsigned char *sourceRow = sourceSlice; 2033 unsigned char *destinationRow = destinationSlice; 2034 2035 for(int y = 0; y < destination.height && y < source.height; y++) 2036 { 2037 unsigned char *sourceElement = sourceRow; 2038 unsigned char *destinationElement = destinationRow; 2039 2040 for(int x = 0; x < destination.width && x < source.width; x++) 2041 { 2042 unsigned int xrgb = *(unsigned short*)sourceElement; 2043 2044 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000; 2045 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00; 2046 unsigned int b = (xrgb & 0x000F) * 0x00000011; 2047 2048 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2049 2050 sourceElement += source.bytes; 2051 destinationElement += destination.bytes; 2052 } 2053 2054 sourceRow += source.pitchB; 2055 destinationRow += destination.pitchB; 2056 } 2057 2058 sourceSlice += source.sliceB; 2059 destinationSlice += destination.sliceB; 2060 } 2061 } 2062 decodeA4R4G4B4(Buffer & destination,const Buffer & source)2063 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source) 2064 { 2065 unsigned char *sourceSlice = (unsigned char*)source.buffer; 2066 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 2067 2068 for(int z = 0; z < destination.depth && z < source.depth; z++) 2069 { 2070 unsigned char *sourceRow = sourceSlice; 2071 unsigned char *destinationRow = destinationSlice; 2072 2073 for(int y = 0; y < destination.height && y < source.height; y++) 2074 { 2075 unsigned char *sourceElement = sourceRow; 2076 unsigned char *destinationElement = destinationRow; 2077 2078 for(int x = 0; x < destination.width && x < source.width; x++) 2079 { 2080 unsigned int argb = *(unsigned short*)sourceElement; 2081 2082 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000; 2083 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000; 2084 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00; 2085 unsigned int b = (argb & 0x000F) * 0x00000011; 2086 2087 *(unsigned int*)destinationElement = a | r | g | b; 2088 2089 sourceElement += source.bytes; 2090 destinationElement += destination.bytes; 2091 } 2092 2093 sourceRow += source.pitchB; 2094 destinationRow += destination.pitchB; 2095 } 2096 2097 sourceSlice += source.sliceB; 2098 destinationSlice += destination.sliceB; 2099 } 2100 } 2101 decodeP8(Buffer & destination,const Buffer & source)2102 void Surface::decodeP8(Buffer &destination, const Buffer &source) 2103 { 2104 unsigned char *sourceSlice = (unsigned char*)source.buffer; 2105 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 2106 2107 for(int z = 0; z < destination.depth && z < source.depth; z++) 2108 { 2109 unsigned char *sourceRow = sourceSlice; 2110 unsigned char *destinationRow = destinationSlice; 2111 2112 for(int y = 0; y < destination.height && y < source.height; y++) 2113 { 2114 unsigned char *sourceElement = sourceRow; 2115 unsigned char *destinationElement = destinationRow; 2116 2117 for(int x = 0; x < destination.width && x < source.width; x++) 2118 { 2119 unsigned int abgr = palette[*(unsigned char*)sourceElement]; 2120 2121 unsigned int r = (abgr & 0x000000FF) << 16; 2122 unsigned int g = (abgr & 0x0000FF00) << 0; 2123 unsigned int b = (abgr & 0x00FF0000) >> 16; 2124 unsigned int a = (abgr & 0xFF000000) >> 0; 2125 2126 *(unsigned int*)destinationElement = a | r | g | b; 2127 2128 sourceElement += source.bytes; 2129 destinationElement += destination.bytes; 2130 } 2131 2132 sourceRow += source.pitchB; 2133 destinationRow += destination.pitchB; 2134 } 2135 2136 sourceSlice += source.sliceB; 2137 destinationSlice += destination.sliceB; 2138 } 2139 } 2140 2141 #if S3TC_SUPPORT decodeDXT1(Buffer & internal,const Buffer & external)2142 void Surface::decodeDXT1(Buffer &internal, const Buffer &external) 2143 { 2144 unsigned int *destSlice = (unsigned int*)internal.buffer; 2145 const DXT1 *source = (const DXT1*)external.buffer; 2146 2147 for(int z = 0; z < external.depth; z++) 2148 { 2149 unsigned int *dest = destSlice; 2150 2151 for(int y = 0; y < external.height; y += 4) 2152 { 2153 for(int x = 0; x < external.width; x += 4) 2154 { 2155 Color<byte> c[4]; 2156 2157 c[0] = source->c0; 2158 c[1] = source->c1; 2159 2160 if(source->c0 > source->c1) // No transparency 2161 { 2162 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2163 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2164 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2165 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2166 c[2].a = 0xFF; 2167 2168 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2169 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2170 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2171 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2172 c[3].a = 0xFF; 2173 } 2174 else // c3 transparent 2175 { 2176 // c2 = 1 / 2 * c0 + 1 / 2 * c1 2177 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2); 2178 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2); 2179 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2); 2180 c[2].a = 0xFF; 2181 2182 c[3].r = 0; 2183 c[3].g = 0; 2184 c[3].b = 0; 2185 c[3].a = 0; 2186 } 2187 2188 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2189 { 2190 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2191 { 2192 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4]; 2193 } 2194 } 2195 2196 source++; 2197 } 2198 } 2199 2200 (byte*&)destSlice += internal.sliceB; 2201 } 2202 } 2203 decodeDXT3(Buffer & internal,const Buffer & external)2204 void Surface::decodeDXT3(Buffer &internal, const Buffer &external) 2205 { 2206 unsigned int *destSlice = (unsigned int*)internal.buffer; 2207 const DXT3 *source = (const DXT3*)external.buffer; 2208 2209 for(int z = 0; z < external.depth; z++) 2210 { 2211 unsigned int *dest = destSlice; 2212 2213 for(int y = 0; y < external.height; y += 4) 2214 { 2215 for(int x = 0; x < external.width; x += 4) 2216 { 2217 Color<byte> c[4]; 2218 2219 c[0] = source->c0; 2220 c[1] = source->c1; 2221 2222 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2223 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2224 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2225 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2226 2227 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2228 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2229 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2230 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2231 2232 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2233 { 2234 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2235 { 2236 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F; 2237 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24)); 2238 2239 dest[(x + i) + (y + j) * internal.width] = color; 2240 } 2241 } 2242 2243 source++; 2244 } 2245 } 2246 2247 (byte*&)destSlice += internal.sliceB; 2248 } 2249 } 2250 decodeDXT5(Buffer & internal,const Buffer & external)2251 void Surface::decodeDXT5(Buffer &internal, const Buffer &external) 2252 { 2253 unsigned int *destSlice = (unsigned int*)internal.buffer; 2254 const DXT5 *source = (const DXT5*)external.buffer; 2255 2256 for(int z = 0; z < external.depth; z++) 2257 { 2258 unsigned int *dest = destSlice; 2259 2260 for(int y = 0; y < external.height; y += 4) 2261 { 2262 for(int x = 0; x < external.width; x += 4) 2263 { 2264 Color<byte> c[4]; 2265 2266 c[0] = source->c0; 2267 c[1] = source->c1; 2268 2269 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2270 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2271 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2272 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2273 2274 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2275 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2276 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2277 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2278 2279 byte a[8]; 2280 2281 a[0] = source->a0; 2282 a[1] = source->a1; 2283 2284 if(a[0] > a[1]) 2285 { 2286 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7); 2287 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7); 2288 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7); 2289 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7); 2290 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7); 2291 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7); 2292 } 2293 else 2294 { 2295 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5); 2296 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5); 2297 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5); 2298 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5); 2299 a[6] = 0; 2300 a[7] = 0xFF; 2301 } 2302 2303 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2304 { 2305 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2306 { 2307 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24; 2308 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha; 2309 2310 dest[(x + i) + (y + j) * internal.width] = color; 2311 } 2312 } 2313 2314 source++; 2315 } 2316 } 2317 2318 (byte*&)destSlice += internal.sliceB; 2319 } 2320 } 2321 #endif 2322 decodeATI1(Buffer & internal,const Buffer & external)2323 void Surface::decodeATI1(Buffer &internal, const Buffer &external) 2324 { 2325 byte *destSlice = (byte*)internal.buffer; 2326 const ATI1 *source = (const ATI1*)external.buffer; 2327 2328 for(int z = 0; z < external.depth; z++) 2329 { 2330 byte *dest = destSlice; 2331 2332 for(int y = 0; y < external.height; y += 4) 2333 { 2334 for(int x = 0; x < external.width; x += 4) 2335 { 2336 byte r[8]; 2337 2338 r[0] = source->r0; 2339 r[1] = source->r1; 2340 2341 if(r[0] > r[1]) 2342 { 2343 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7); 2344 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7); 2345 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7); 2346 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7); 2347 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7); 2348 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7); 2349 } 2350 else 2351 { 2352 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5); 2353 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5); 2354 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5); 2355 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5); 2356 r[6] = 0; 2357 r[7] = 0xFF; 2358 } 2359 2360 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2361 { 2362 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2363 { 2364 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8]; 2365 } 2366 } 2367 2368 source++; 2369 } 2370 } 2371 2372 destSlice += internal.sliceB; 2373 } 2374 } 2375 decodeATI2(Buffer & internal,const Buffer & external)2376 void Surface::decodeATI2(Buffer &internal, const Buffer &external) 2377 { 2378 word *destSlice = (word*)internal.buffer; 2379 const ATI2 *source = (const ATI2*)external.buffer; 2380 2381 for(int z = 0; z < external.depth; z++) 2382 { 2383 word *dest = destSlice; 2384 2385 for(int y = 0; y < external.height; y += 4) 2386 { 2387 for(int x = 0; x < external.width; x += 4) 2388 { 2389 byte X[8]; 2390 2391 X[0] = source->x0; 2392 X[1] = source->x1; 2393 2394 if(X[0] > X[1]) 2395 { 2396 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7); 2397 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7); 2398 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7); 2399 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7); 2400 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7); 2401 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7); 2402 } 2403 else 2404 { 2405 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5); 2406 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5); 2407 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5); 2408 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5); 2409 X[6] = 0; 2410 X[7] = 0xFF; 2411 } 2412 2413 byte Y[8]; 2414 2415 Y[0] = source->y0; 2416 Y[1] = source->y1; 2417 2418 if(Y[0] > Y[1]) 2419 { 2420 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7); 2421 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7); 2422 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7); 2423 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7); 2424 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7); 2425 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7); 2426 } 2427 else 2428 { 2429 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5); 2430 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5); 2431 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5); 2432 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5); 2433 Y[6] = 0; 2434 Y[7] = 0xFF; 2435 } 2436 2437 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2438 { 2439 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2440 { 2441 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8]; 2442 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8]; 2443 2444 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r; 2445 } 2446 } 2447 2448 source++; 2449 } 2450 } 2451 2452 (byte*&)destSlice += internal.sliceB; 2453 } 2454 } 2455 decodeETC2(Buffer & internal,const Buffer & external,int nbAlphaBits,bool isSRGB)2456 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB) 2457 { 2458 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2459 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB)); 2460 2461 if(isSRGB) 2462 { 2463 static byte sRGBtoLinearTable[256]; 2464 static bool sRGBtoLinearTableDirty = true; 2465 if(sRGBtoLinearTableDirty) 2466 { 2467 for(int i = 0; i < 256; i++) 2468 { 2469 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f); 2470 } 2471 sRGBtoLinearTableDirty = false; 2472 } 2473 2474 // Perform sRGB conversion in place after decoding 2475 byte* src = (byte*)internal.buffer; 2476 for(int y = 0; y < internal.height; y++) 2477 { 2478 byte* srcRow = src + y * internal.pitchB; 2479 for(int x = 0; x < internal.width; x++) 2480 { 2481 byte* srcPix = srcRow + x * internal.bytes; 2482 for(int i = 0; i < 3; i++) 2483 { 2484 srcPix[i] = sRGBtoLinearTable[srcPix[i]]; 2485 } 2486 } 2487 } 2488 } 2489 } 2490 decodeEAC(Buffer & internal,const Buffer & external,int nbChannels,bool isSigned)2491 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned) 2492 { 2493 ASSERT(nbChannels == 1 || nbChannels == 2); 2494 2495 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2496 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED)); 2497 2498 // FIXME: We convert signed data to float, until signed integer internal formats are supported 2499 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats 2500 if(isSigned) 2501 { 2502 sbyte* src = (sbyte*)internal.buffer; 2503 2504 for(int y = 0; y < internal.height; y++) 2505 { 2506 sbyte* srcRow = src + y * internal.pitchB; 2507 for(int x = internal.width - 1; x >= 0; x--) 2508 { 2509 int dx = x & 0xFFFFFFFC; 2510 int mx = x - dx; 2511 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels; 2512 float* dstPix = (float*)(srcRow + x * internal.bytes); 2513 for(int c = nbChannels - 1; c >= 0; c--) 2514 { 2515 static const float normalization = 1.0f / 127.875f; 2516 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f); 2517 } 2518 } 2519 } 2520 } 2521 } 2522 decodeASTC(Buffer & internal,const Buffer & external,int xBlockSize,int yBlockSize,int zBlockSize,bool isSRGB)2523 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB) 2524 { 2525 } 2526 size(int width,int height,int depth,Format format)2527 unsigned int Surface::size(int width, int height, int depth, Format format) 2528 { 2529 // Dimensions rounded up to multiples of 4, used for compressed formats 2530 int width4 = align(width, 4); 2531 int height4 = align(height, 4); 2532 2533 switch(format) 2534 { 2535 #if S3TC_SUPPORT 2536 case FORMAT_DXT1: 2537 #endif 2538 case FORMAT_ATI1: 2539 case FORMAT_ETC1: 2540 case FORMAT_R11_EAC: 2541 case FORMAT_SIGNED_R11_EAC: 2542 case FORMAT_RGB8_ETC2: 2543 case FORMAT_SRGB8_ETC2: 2544 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2545 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2546 return width4 * height4 * depth / 2; 2547 #if S3TC_SUPPORT 2548 case FORMAT_DXT3: 2549 case FORMAT_DXT5: 2550 #endif 2551 case FORMAT_ATI2: 2552 case FORMAT_RG11_EAC: 2553 case FORMAT_SIGNED_RG11_EAC: 2554 case FORMAT_RGBA8_ETC2_EAC: 2555 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2556 case FORMAT_RGBA_ASTC_4x4_KHR: 2557 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2558 return width4 * height4 * depth; 2559 case FORMAT_RGBA_ASTC_5x4_KHR: 2560 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2561 return align(width, 5) * height4 * depth; 2562 case FORMAT_RGBA_ASTC_5x5_KHR: 2563 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2564 return align(width, 5) * align(height, 5) * depth; 2565 case FORMAT_RGBA_ASTC_6x5_KHR: 2566 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2567 return align(width, 6) * align(height, 5) * depth; 2568 case FORMAT_RGBA_ASTC_6x6_KHR: 2569 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2570 return align(width, 6) * align(height, 6) * depth; 2571 case FORMAT_RGBA_ASTC_8x5_KHR: 2572 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2573 return align(width, 8) * align(height, 5) * depth; 2574 case FORMAT_RGBA_ASTC_8x6_KHR: 2575 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2576 return align(width, 8) * align(height, 6) * depth; 2577 case FORMAT_RGBA_ASTC_8x8_KHR: 2578 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2579 return align(width, 8) * align(height, 8) * depth; 2580 case FORMAT_RGBA_ASTC_10x5_KHR: 2581 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2582 return align(width, 10) * align(height, 5) * depth; 2583 case FORMAT_RGBA_ASTC_10x6_KHR: 2584 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2585 return align(width, 10) * align(height, 6) * depth; 2586 case FORMAT_RGBA_ASTC_10x8_KHR: 2587 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2588 return align(width, 10) * align(height, 8) * depth; 2589 case FORMAT_RGBA_ASTC_10x10_KHR: 2590 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2591 return align(width, 10) * align(height, 10) * depth; 2592 case FORMAT_RGBA_ASTC_12x10_KHR: 2593 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2594 return align(width, 12) * align(height, 10) * depth; 2595 case FORMAT_RGBA_ASTC_12x12_KHR: 2596 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2597 return align(width, 12) * align(height, 12) * depth; 2598 case FORMAT_YV12_BT601: 2599 case FORMAT_YV12_BT709: 2600 case FORMAT_YV12_JFIF: 2601 { 2602 unsigned int YStride = align(width, 16); 2603 unsigned int YSize = YStride * height; 2604 unsigned int CStride = align(YStride / 2, 16); 2605 unsigned int CSize = CStride * height / 2; 2606 2607 return YSize + 2 * CSize; 2608 } 2609 default: 2610 return bytes(format) * width * height * depth; 2611 } 2612 } 2613 isStencil(Format format)2614 bool Surface::isStencil(Format format) 2615 { 2616 switch(format) 2617 { 2618 case FORMAT_D32: 2619 case FORMAT_D16: 2620 case FORMAT_D24X8: 2621 case FORMAT_D32F: 2622 case FORMAT_D32F_COMPLEMENTARY: 2623 case FORMAT_D32F_LOCKABLE: 2624 return false; 2625 case FORMAT_D24S8: 2626 case FORMAT_D24FS8: 2627 case FORMAT_S8: 2628 case FORMAT_DF24S8: 2629 case FORMAT_DF16S8: 2630 case FORMAT_D32FS8_TEXTURE: 2631 case FORMAT_D32FS8_SHADOW: 2632 case FORMAT_INTZ: 2633 return true; 2634 default: 2635 return false; 2636 } 2637 } 2638 isDepth(Format format)2639 bool Surface::isDepth(Format format) 2640 { 2641 switch(format) 2642 { 2643 case FORMAT_D32: 2644 case FORMAT_D16: 2645 case FORMAT_D24X8: 2646 case FORMAT_D24S8: 2647 case FORMAT_D24FS8: 2648 case FORMAT_D32F: 2649 case FORMAT_D32F_COMPLEMENTARY: 2650 case FORMAT_D32F_LOCKABLE: 2651 case FORMAT_DF24S8: 2652 case FORMAT_DF16S8: 2653 case FORMAT_D32FS8_TEXTURE: 2654 case FORMAT_D32FS8_SHADOW: 2655 case FORMAT_INTZ: 2656 return true; 2657 case FORMAT_S8: 2658 return false; 2659 default: 2660 return false; 2661 } 2662 } 2663 hasQuadLayout(Format format)2664 bool Surface::hasQuadLayout(Format format) 2665 { 2666 switch(format) 2667 { 2668 case FORMAT_D32: 2669 case FORMAT_D16: 2670 case FORMAT_D24X8: 2671 case FORMAT_D24S8: 2672 case FORMAT_D24FS8: 2673 case FORMAT_D32F: 2674 case FORMAT_D32F_COMPLEMENTARY: 2675 case FORMAT_DF24S8: 2676 case FORMAT_DF16S8: 2677 case FORMAT_INTZ: 2678 case FORMAT_S8: 2679 case FORMAT_A8G8R8B8Q: 2680 case FORMAT_X8G8R8B8Q: 2681 return true; 2682 case FORMAT_D32F_LOCKABLE: 2683 case FORMAT_D32FS8_TEXTURE: 2684 case FORMAT_D32FS8_SHADOW: 2685 default: 2686 break; 2687 } 2688 2689 return false; 2690 } 2691 isPalette(Format format)2692 bool Surface::isPalette(Format format) 2693 { 2694 switch(format) 2695 { 2696 case FORMAT_P8: 2697 case FORMAT_A8P8: 2698 return true; 2699 default: 2700 return false; 2701 } 2702 } 2703 isFloatFormat(Format format)2704 bool Surface::isFloatFormat(Format format) 2705 { 2706 switch(format) 2707 { 2708 case FORMAT_R5G6B5: 2709 case FORMAT_R8G8B8: 2710 case FORMAT_B8G8R8: 2711 case FORMAT_X8R8G8B8: 2712 case FORMAT_X8B8G8R8I: 2713 case FORMAT_X8B8G8R8: 2714 case FORMAT_A8R8G8B8: 2715 case FORMAT_SRGB8_X8: 2716 case FORMAT_SRGB8_A8: 2717 case FORMAT_A8B8G8R8I: 2718 case FORMAT_R8UI: 2719 case FORMAT_G8R8UI: 2720 case FORMAT_X8B8G8R8UI: 2721 case FORMAT_A8B8G8R8UI: 2722 case FORMAT_A8B8G8R8: 2723 case FORMAT_G8R8I: 2724 case FORMAT_G8R8: 2725 case FORMAT_A2B10G10R10: 2726 case FORMAT_R8I_SNORM: 2727 case FORMAT_G8R8I_SNORM: 2728 case FORMAT_X8B8G8R8I_SNORM: 2729 case FORMAT_A8B8G8R8I_SNORM: 2730 case FORMAT_R16I: 2731 case FORMAT_R16UI: 2732 case FORMAT_G16R16I: 2733 case FORMAT_G16R16UI: 2734 case FORMAT_G16R16: 2735 case FORMAT_X16B16G16R16I: 2736 case FORMAT_X16B16G16R16UI: 2737 case FORMAT_A16B16G16R16I: 2738 case FORMAT_A16B16G16R16UI: 2739 case FORMAT_A16B16G16R16: 2740 case FORMAT_V8U8: 2741 case FORMAT_Q8W8V8U8: 2742 case FORMAT_X8L8V8U8: 2743 case FORMAT_V16U16: 2744 case FORMAT_A16W16V16U16: 2745 case FORMAT_Q16W16V16U16: 2746 case FORMAT_A8: 2747 case FORMAT_R8I: 2748 case FORMAT_R8: 2749 case FORMAT_S8: 2750 case FORMAT_L8: 2751 case FORMAT_L16: 2752 case FORMAT_A8L8: 2753 case FORMAT_YV12_BT601: 2754 case FORMAT_YV12_BT709: 2755 case FORMAT_YV12_JFIF: 2756 case FORMAT_R32I: 2757 case FORMAT_R32UI: 2758 case FORMAT_G32R32I: 2759 case FORMAT_G32R32UI: 2760 case FORMAT_X32B32G32R32I: 2761 case FORMAT_X32B32G32R32UI: 2762 case FORMAT_A32B32G32R32I: 2763 case FORMAT_A32B32G32R32UI: 2764 return false; 2765 case FORMAT_R16F: 2766 case FORMAT_G16R16F: 2767 case FORMAT_B16G16R16F: 2768 case FORMAT_A16B16G16R16F: 2769 case FORMAT_R32F: 2770 case FORMAT_G32R32F: 2771 case FORMAT_B32G32R32F: 2772 case FORMAT_X32B32G32R32F: 2773 case FORMAT_A32B32G32R32F: 2774 case FORMAT_D32F: 2775 case FORMAT_D32F_COMPLEMENTARY: 2776 case FORMAT_D32F_LOCKABLE: 2777 case FORMAT_D32FS8_TEXTURE: 2778 case FORMAT_D32FS8_SHADOW: 2779 case FORMAT_L16F: 2780 case FORMAT_A16L16F: 2781 case FORMAT_L32F: 2782 case FORMAT_A32L32F: 2783 return true; 2784 default: 2785 ASSERT(false); 2786 } 2787 2788 return false; 2789 } 2790 isUnsignedComponent(Format format,int component)2791 bool Surface::isUnsignedComponent(Format format, int component) 2792 { 2793 switch(format) 2794 { 2795 case FORMAT_NULL: 2796 case FORMAT_R5G6B5: 2797 case FORMAT_R8G8B8: 2798 case FORMAT_B8G8R8: 2799 case FORMAT_X8R8G8B8: 2800 case FORMAT_X8B8G8R8: 2801 case FORMAT_A8R8G8B8: 2802 case FORMAT_A8B8G8R8: 2803 case FORMAT_SRGB8_X8: 2804 case FORMAT_SRGB8_A8: 2805 case FORMAT_G8R8: 2806 case FORMAT_A2B10G10R10: 2807 case FORMAT_R16UI: 2808 case FORMAT_G16R16: 2809 case FORMAT_G16R16UI: 2810 case FORMAT_X16B16G16R16UI: 2811 case FORMAT_A16B16G16R16: 2812 case FORMAT_A16B16G16R16UI: 2813 case FORMAT_R32UI: 2814 case FORMAT_G32R32UI: 2815 case FORMAT_X32B32G32R32UI: 2816 case FORMAT_A32B32G32R32UI: 2817 case FORMAT_R8UI: 2818 case FORMAT_G8R8UI: 2819 case FORMAT_X8B8G8R8UI: 2820 case FORMAT_A8B8G8R8UI: 2821 case FORMAT_D32F: 2822 case FORMAT_D32F_COMPLEMENTARY: 2823 case FORMAT_D32F_LOCKABLE: 2824 case FORMAT_D32FS8_TEXTURE: 2825 case FORMAT_D32FS8_SHADOW: 2826 case FORMAT_A8: 2827 case FORMAT_R8: 2828 case FORMAT_L8: 2829 case FORMAT_L16: 2830 case FORMAT_A8L8: 2831 case FORMAT_YV12_BT601: 2832 case FORMAT_YV12_BT709: 2833 case FORMAT_YV12_JFIF: 2834 return true; 2835 case FORMAT_A8B8G8R8I: 2836 case FORMAT_A16B16G16R16I: 2837 case FORMAT_A32B32G32R32I: 2838 case FORMAT_A8B8G8R8I_SNORM: 2839 case FORMAT_Q8W8V8U8: 2840 case FORMAT_Q16W16V16U16: 2841 case FORMAT_A32B32G32R32F: 2842 return false; 2843 case FORMAT_R32F: 2844 case FORMAT_R8I: 2845 case FORMAT_R16I: 2846 case FORMAT_R32I: 2847 case FORMAT_R8I_SNORM: 2848 return component >= 1; 2849 case FORMAT_V8U8: 2850 case FORMAT_X8L8V8U8: 2851 case FORMAT_V16U16: 2852 case FORMAT_G32R32F: 2853 case FORMAT_G8R8I: 2854 case FORMAT_G16R16I: 2855 case FORMAT_G32R32I: 2856 case FORMAT_G8R8I_SNORM: 2857 return component >= 2; 2858 case FORMAT_A16W16V16U16: 2859 case FORMAT_B32G32R32F: 2860 case FORMAT_X32B32G32R32F: 2861 case FORMAT_X8B8G8R8I: 2862 case FORMAT_X16B16G16R16I: 2863 case FORMAT_X32B32G32R32I: 2864 case FORMAT_X8B8G8R8I_SNORM: 2865 return component >= 3; 2866 default: 2867 ASSERT(false); 2868 } 2869 2870 return false; 2871 } 2872 isSRGBreadable(Format format)2873 bool Surface::isSRGBreadable(Format format) 2874 { 2875 // Keep in sync with Capabilities::isSRGBreadable 2876 switch(format) 2877 { 2878 case FORMAT_L8: 2879 case FORMAT_A8L8: 2880 case FORMAT_R8G8B8: 2881 case FORMAT_A8R8G8B8: 2882 case FORMAT_X8R8G8B8: 2883 case FORMAT_A8B8G8R8: 2884 case FORMAT_X8B8G8R8: 2885 case FORMAT_SRGB8_X8: 2886 case FORMAT_SRGB8_A8: 2887 case FORMAT_R5G6B5: 2888 case FORMAT_X1R5G5B5: 2889 case FORMAT_A1R5G5B5: 2890 case FORMAT_A4R4G4B4: 2891 #if S3TC_SUPPORT 2892 case FORMAT_DXT1: 2893 case FORMAT_DXT3: 2894 case FORMAT_DXT5: 2895 #endif 2896 case FORMAT_ATI1: 2897 case FORMAT_ATI2: 2898 return true; 2899 default: 2900 return false; 2901 } 2902 } 2903 isSRGBwritable(Format format)2904 bool Surface::isSRGBwritable(Format format) 2905 { 2906 // Keep in sync with Capabilities::isSRGBwritable 2907 switch(format) 2908 { 2909 case FORMAT_NULL: 2910 case FORMAT_A8R8G8B8: 2911 case FORMAT_X8R8G8B8: 2912 case FORMAT_A8B8G8R8: 2913 case FORMAT_X8B8G8R8: 2914 case FORMAT_SRGB8_X8: 2915 case FORMAT_SRGB8_A8: 2916 case FORMAT_R5G6B5: 2917 return true; 2918 default: 2919 return false; 2920 } 2921 } 2922 isCompressed(Format format)2923 bool Surface::isCompressed(Format format) 2924 { 2925 switch(format) 2926 { 2927 #if S3TC_SUPPORT 2928 case FORMAT_DXT1: 2929 case FORMAT_DXT3: 2930 case FORMAT_DXT5: 2931 #endif 2932 case FORMAT_ATI1: 2933 case FORMAT_ATI2: 2934 case FORMAT_ETC1: 2935 case FORMAT_R11_EAC: 2936 case FORMAT_SIGNED_R11_EAC: 2937 case FORMAT_RG11_EAC: 2938 case FORMAT_SIGNED_RG11_EAC: 2939 case FORMAT_RGB8_ETC2: 2940 case FORMAT_SRGB8_ETC2: 2941 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2942 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2943 case FORMAT_RGBA8_ETC2_EAC: 2944 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2945 case FORMAT_RGBA_ASTC_4x4_KHR: 2946 case FORMAT_RGBA_ASTC_5x4_KHR: 2947 case FORMAT_RGBA_ASTC_5x5_KHR: 2948 case FORMAT_RGBA_ASTC_6x5_KHR: 2949 case FORMAT_RGBA_ASTC_6x6_KHR: 2950 case FORMAT_RGBA_ASTC_8x5_KHR: 2951 case FORMAT_RGBA_ASTC_8x6_KHR: 2952 case FORMAT_RGBA_ASTC_8x8_KHR: 2953 case FORMAT_RGBA_ASTC_10x5_KHR: 2954 case FORMAT_RGBA_ASTC_10x6_KHR: 2955 case FORMAT_RGBA_ASTC_10x8_KHR: 2956 case FORMAT_RGBA_ASTC_10x10_KHR: 2957 case FORMAT_RGBA_ASTC_12x10_KHR: 2958 case FORMAT_RGBA_ASTC_12x12_KHR: 2959 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2960 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2961 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2962 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2963 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2964 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2965 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2966 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2967 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2968 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2969 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2970 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2971 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2972 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2973 return true; 2974 default: 2975 return false; 2976 } 2977 } 2978 isSignedNonNormalizedInteger(Format format)2979 bool Surface::isSignedNonNormalizedInteger(Format format) 2980 { 2981 switch(format) 2982 { 2983 case FORMAT_A8B8G8R8I: 2984 case FORMAT_X8B8G8R8I: 2985 case FORMAT_G8R8I: 2986 case FORMAT_R8I: 2987 case FORMAT_A16B16G16R16I: 2988 case FORMAT_X16B16G16R16I: 2989 case FORMAT_G16R16I: 2990 case FORMAT_R16I: 2991 case FORMAT_A32B32G32R32I: 2992 case FORMAT_X32B32G32R32I: 2993 case FORMAT_G32R32I: 2994 case FORMAT_R32I: 2995 return true; 2996 default: 2997 return false; 2998 } 2999 } 3000 isUnsignedNonNormalizedInteger(Format format)3001 bool Surface::isUnsignedNonNormalizedInteger(Format format) 3002 { 3003 switch(format) 3004 { 3005 case FORMAT_A8B8G8R8UI: 3006 case FORMAT_X8B8G8R8UI: 3007 case FORMAT_G8R8UI: 3008 case FORMAT_R8UI: 3009 case FORMAT_A16B16G16R16UI: 3010 case FORMAT_X16B16G16R16UI: 3011 case FORMAT_G16R16UI: 3012 case FORMAT_R16UI: 3013 case FORMAT_A32B32G32R32UI: 3014 case FORMAT_X32B32G32R32UI: 3015 case FORMAT_G32R32UI: 3016 case FORMAT_R32UI: 3017 return true; 3018 default: 3019 return false; 3020 } 3021 } 3022 isNonNormalizedInteger(Format format)3023 bool Surface::isNonNormalizedInteger(Format format) 3024 { 3025 return isSignedNonNormalizedInteger(format) || 3026 isUnsignedNonNormalizedInteger(format); 3027 } 3028 isNormalizedInteger(Format format)3029 bool Surface::isNormalizedInteger(Format format) 3030 { 3031 return !isFloatFormat(format) && 3032 !isNonNormalizedInteger(format) && 3033 !isCompressed(format) && 3034 !isDepth(format) && 3035 !isStencil(format); 3036 } 3037 componentCount(Format format)3038 int Surface::componentCount(Format format) 3039 { 3040 switch(format) 3041 { 3042 case FORMAT_R5G6B5: return 3; 3043 case FORMAT_X8R8G8B8: return 3; 3044 case FORMAT_X8B8G8R8I: return 3; 3045 case FORMAT_X8B8G8R8: return 3; 3046 case FORMAT_A8R8G8B8: return 4; 3047 case FORMAT_SRGB8_X8: return 3; 3048 case FORMAT_SRGB8_A8: return 4; 3049 case FORMAT_A8B8G8R8I: return 4; 3050 case FORMAT_A8B8G8R8: return 4; 3051 case FORMAT_G8R8I: return 2; 3052 case FORMAT_G8R8: return 2; 3053 case FORMAT_R8I_SNORM: return 1; 3054 case FORMAT_G8R8I_SNORM: return 2; 3055 case FORMAT_X8B8G8R8I_SNORM:return 3; 3056 case FORMAT_A8B8G8R8I_SNORM:return 4; 3057 case FORMAT_R8UI: return 1; 3058 case FORMAT_G8R8UI: return 2; 3059 case FORMAT_X8B8G8R8UI: return 3; 3060 case FORMAT_A8B8G8R8UI: return 4; 3061 case FORMAT_A2B10G10R10: return 4; 3062 case FORMAT_G16R16I: return 2; 3063 case FORMAT_G16R16UI: return 2; 3064 case FORMAT_G16R16: return 2; 3065 case FORMAT_G32R32I: return 2; 3066 case FORMAT_G32R32UI: return 2; 3067 case FORMAT_X16B16G16R16I: return 3; 3068 case FORMAT_X16B16G16R16UI: return 3; 3069 case FORMAT_A16B16G16R16I: return 4; 3070 case FORMAT_A16B16G16R16UI: return 4; 3071 case FORMAT_A16B16G16R16: return 4; 3072 case FORMAT_X32B32G32R32I: return 3; 3073 case FORMAT_X32B32G32R32UI: return 3; 3074 case FORMAT_A32B32G32R32I: return 4; 3075 case FORMAT_A32B32G32R32UI: return 4; 3076 case FORMAT_V8U8: return 2; 3077 case FORMAT_Q8W8V8U8: return 4; 3078 case FORMAT_X8L8V8U8: return 3; 3079 case FORMAT_V16U16: return 2; 3080 case FORMAT_A16W16V16U16: return 4; 3081 case FORMAT_Q16W16V16U16: return 4; 3082 case FORMAT_R32F: return 1; 3083 case FORMAT_G32R32F: return 2; 3084 case FORMAT_X32B32G32R32F: return 3; 3085 case FORMAT_A32B32G32R32F: return 4; 3086 case FORMAT_D32F: return 1; 3087 case FORMAT_D32F_LOCKABLE: return 1; 3088 case FORMAT_D32FS8_TEXTURE: return 1; 3089 case FORMAT_D32FS8_SHADOW: return 1; 3090 case FORMAT_A8: return 1; 3091 case FORMAT_R8I: return 1; 3092 case FORMAT_R8: return 1; 3093 case FORMAT_R16I: return 1; 3094 case FORMAT_R16UI: return 1; 3095 case FORMAT_R32I: return 1; 3096 case FORMAT_R32UI: return 1; 3097 case FORMAT_L8: return 1; 3098 case FORMAT_L16: return 1; 3099 case FORMAT_A8L8: return 2; 3100 case FORMAT_YV12_BT601: return 3; 3101 case FORMAT_YV12_BT709: return 3; 3102 case FORMAT_YV12_JFIF: return 3; 3103 default: 3104 ASSERT(false); 3105 } 3106 3107 return 1; 3108 } 3109 allocateBuffer(int width,int height,int depth,Format format)3110 void *Surface::allocateBuffer(int width, int height, int depth, Format format) 3111 { 3112 // Render targets require 2x2 quads 3113 int width2 = (width + 1) & ~1; 3114 int height2 = (height + 1) & ~1; 3115 3116 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes, 3117 // and stencil operations also read 8 bytes per four 8-bit stencil values, 3118 // so we have to allocate 4 extra bytes to avoid buffer overruns. 3119 return allocate(size(width2, height2, depth, format) + 4); 3120 } 3121 memfill4(void * buffer,int pattern,int bytes)3122 void Surface::memfill4(void *buffer, int pattern, int bytes) 3123 { 3124 while((size_t)buffer & 0x1 && bytes >= 1) 3125 { 3126 *(char*)buffer = (char)pattern; 3127 (char*&)buffer += 1; 3128 bytes -= 1; 3129 } 3130 3131 while((size_t)buffer & 0x3 && bytes >= 2) 3132 { 3133 *(short*)buffer = (short)pattern; 3134 (short*&)buffer += 1; 3135 bytes -= 2; 3136 } 3137 3138 #if defined(__i386__) || defined(__x86_64__) 3139 if(CPUID::supportsSSE()) 3140 { 3141 while((size_t)buffer & 0xF && bytes >= 4) 3142 { 3143 *(int*)buffer = pattern; 3144 (int*&)buffer += 1; 3145 bytes -= 4; 3146 } 3147 3148 __m128 quad = _mm_set_ps1((float&)pattern); 3149 3150 float *pointer = (float*)buffer; 3151 int qxwords = bytes / 64; 3152 bytes -= qxwords * 64; 3153 3154 while(qxwords--) 3155 { 3156 _mm_stream_ps(pointer + 0, quad); 3157 _mm_stream_ps(pointer + 4, quad); 3158 _mm_stream_ps(pointer + 8, quad); 3159 _mm_stream_ps(pointer + 12, quad); 3160 3161 pointer += 16; 3162 } 3163 3164 buffer = pointer; 3165 } 3166 #endif 3167 3168 while(bytes >= 4) 3169 { 3170 *(int*)buffer = (int)pattern; 3171 (int*&)buffer += 1; 3172 bytes -= 4; 3173 } 3174 3175 while(bytes >= 2) 3176 { 3177 *(short*)buffer = (short)pattern; 3178 (short*&)buffer += 1; 3179 bytes -= 2; 3180 } 3181 3182 while(bytes >= 1) 3183 { 3184 *(char*)buffer = (char)pattern; 3185 (char*&)buffer += 1; 3186 bytes -= 1; 3187 } 3188 } 3189 sync()3190 void Surface::sync() 3191 { 3192 resource->lock(EXCLUSIVE); 3193 resource->unlock(); 3194 } 3195 isEntire(const Rect & rect) const3196 bool Surface::isEntire(const Rect& rect) const 3197 { 3198 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1); 3199 } 3200 getRect() const3201 Rect Surface::getRect() const 3202 { 3203 return Rect(0, 0, internal.width, internal.height); 3204 } 3205 clearDepth(float depth,int x0,int y0,int width,int height)3206 void Surface::clearDepth(float depth, int x0, int y0, int width, int height) 3207 { 3208 if(width == 0 || height == 0) return; 3209 3210 // Not overlapping 3211 if(x0 > internal.width) return; 3212 if(y0 > internal.height) return; 3213 if(x0 + width < 0) return; 3214 if(y0 + height < 0) return; 3215 3216 // Clip against dimensions 3217 if(x0 < 0) {width += x0; x0 = 0;} 3218 if(x0 + width > internal.width) width = internal.width - x0; 3219 if(y0 < 0) {height += y0; y0 = 0;} 3220 if(y0 + height > internal.height) height = internal.height - y0; 3221 3222 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 3223 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 3224 3225 int width2 = (internal.width + 1) & ~1; 3226 3227 int x1 = x0 + width; 3228 int y1 = y0 + height; 3229 3230 if(internal.format == FORMAT_D32F_LOCKABLE || 3231 internal.format == FORMAT_D32FS8_TEXTURE || 3232 internal.format == FORMAT_D32FS8_SHADOW) 3233 { 3234 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0; 3235 3236 for(int z = 0; z < internal.depth; z++) 3237 { 3238 for(int y = y0; y < y1; y++) 3239 { 3240 memfill4(target, (int&)depth, 4 * width); 3241 target += width2; 3242 } 3243 } 3244 3245 unlockInternal(); 3246 } 3247 else // Quad layout 3248 { 3249 if(complementaryDepthBuffer) 3250 { 3251 depth = 1 - depth; 3252 } 3253 3254 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC); 3255 3256 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3257 int oddX1 = (x1 & ~1) * 2; 3258 int evenX0 = ((x0 + 1) & ~1) * 2; 3259 int evenBytes = (oddX1 - evenX0) * sizeof(float); 3260 3261 for(int z = 0; z < internal.depth; z++) 3262 { 3263 for(int y = y0; y < y1; y++) 3264 { 3265 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3266 3267 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 3268 { 3269 if((x0 & 1) != 0) 3270 { 3271 target[oddX0 + 0] = depth; 3272 target[oddX0 + 2] = depth; 3273 } 3274 3275 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4) 3276 // { 3277 // target[x2 + 0] = depth; 3278 // target[x2 + 1] = depth; 3279 // target[x2 + 2] = depth; 3280 // target[x2 + 3] = depth; 3281 // } 3282 3283 // __asm 3284 // { 3285 // movss xmm0, depth 3286 // shufps xmm0, xmm0, 0x00 3287 // 3288 // mov eax, x0 3289 // add eax, 1 3290 // and eax, 0xFFFFFFFE 3291 // cmp eax, x1 3292 // jge qEnd 3293 // 3294 // mov edi, target 3295 // 3296 // qLoop: 3297 // movntps [edi+8*eax], xmm0 3298 // 3299 // add eax, 2 3300 // cmp eax, x1 3301 // jl qLoop 3302 // qEnd: 3303 // } 3304 3305 memfill4(&target[evenX0], (int&)depth, evenBytes); 3306 3307 if((x1 & 1) != 0) 3308 { 3309 target[oddX1 + 0] = depth; 3310 target[oddX1 + 2] = depth; 3311 } 3312 3313 y++; 3314 } 3315 else 3316 { 3317 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3318 { 3319 target[i] = depth; 3320 } 3321 } 3322 } 3323 3324 buffer += internal.sliceP; 3325 } 3326 3327 unlockInternal(); 3328 } 3329 } 3330 clearStencil(unsigned char s,unsigned char mask,int x0,int y0,int width,int height)3331 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height) 3332 { 3333 if(mask == 0 || width == 0 || height == 0) return; 3334 3335 // Not overlapping 3336 if(x0 > internal.width) return; 3337 if(y0 > internal.height) return; 3338 if(x0 + width < 0) return; 3339 if(y0 + height < 0) return; 3340 3341 // Clip against dimensions 3342 if(x0 < 0) {width += x0; x0 = 0;} 3343 if(x0 + width > internal.width) width = internal.width - x0; 3344 if(y0 < 0) {height += y0; y0 = 0;} 3345 if(y0 + height > internal.height) height = internal.height - y0; 3346 3347 int width2 = (internal.width + 1) & ~1; 3348 3349 int x1 = x0 + width; 3350 int y1 = y0 + height; 3351 3352 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3353 int oddX1 = (x1 & ~1) * 2; 3354 int evenX0 = ((x0 + 1) & ~1) * 2; 3355 int evenBytes = oddX1 - evenX0; 3356 3357 unsigned char maskedS = s & mask; 3358 unsigned char invMask = ~mask; 3359 unsigned int fill = maskedS; 3360 fill = fill | (fill << 8) | (fill << 16) | (fill << 24); 3361 3362 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC); 3363 3364 // Stencil buffers are assumed to use quad layout 3365 for(int z = 0; z < stencil.depth; z++) 3366 { 3367 for(int y = y0; y < y1; y++) 3368 { 3369 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3370 3371 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once 3372 { 3373 if((x0 & 1) != 0) 3374 { 3375 target[oddX0 + 0] = fill; 3376 target[oddX0 + 2] = fill; 3377 } 3378 3379 memfill4(&target[evenX0], fill, evenBytes); 3380 3381 if((x1 & 1) != 0) 3382 { 3383 target[oddX1 + 0] = fill; 3384 target[oddX1 + 2] = fill; 3385 } 3386 3387 y++; 3388 } 3389 else 3390 { 3391 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3392 { 3393 target[i] = maskedS | (target[i] & invMask); 3394 } 3395 } 3396 } 3397 3398 buffer += stencil.sliceP; 3399 } 3400 3401 unlockStencil(); 3402 } 3403 fill(const Color<float> & color,int x0,int y0,int width,int height)3404 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height) 3405 { 3406 unsigned char *row; 3407 Buffer *buffer; 3408 3409 if(internal.dirty) 3410 { 3411 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3412 buffer = &internal; 3413 } 3414 else 3415 { 3416 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3417 buffer = &external; 3418 } 3419 3420 if(buffer->bytes <= 4) 3421 { 3422 int c; 3423 buffer->write(&c, color); 3424 3425 if(buffer->bytes <= 1) c = (c << 8) | c; 3426 if(buffer->bytes <= 2) c = (c << 16) | c; 3427 3428 for(int y = 0; y < height; y++) 3429 { 3430 memfill4(row, c, width * buffer->bytes); 3431 3432 row += buffer->pitchB; 3433 } 3434 } 3435 else // Generic 3436 { 3437 for(int y = 0; y < height; y++) 3438 { 3439 unsigned char *element = row; 3440 3441 for(int x = 0; x < width; x++) 3442 { 3443 buffer->write(element, color); 3444 3445 element += buffer->bytes; 3446 } 3447 3448 row += buffer->pitchB; 3449 } 3450 } 3451 3452 if(buffer == &internal) 3453 { 3454 unlockInternal(); 3455 } 3456 else 3457 { 3458 unlockExternal(); 3459 } 3460 } 3461 copyInternal(const Surface * source,int x,int y,float srcX,float srcY,bool filter)3462 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter) 3463 { 3464 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3465 3466 sw::Color<float> color; 3467 3468 if(!filter) 3469 { 3470 color = source->internal.read((int)srcX, (int)srcY); 3471 } 3472 else // Bilinear filtering 3473 { 3474 color = source->internal.sample(srcX, srcY); 3475 } 3476 3477 internal.write(x, y, color); 3478 } 3479 copyInternal(const Surface * source,int x,int y,int z,float srcX,float srcY,float srcZ,bool filter)3480 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter) 3481 { 3482 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3483 3484 sw::Color<float> color; 3485 3486 if(!filter) 3487 { 3488 color = source->internal.read((int)srcX, (int)srcY, int(srcZ)); 3489 } 3490 else // Bilinear filtering 3491 { 3492 color = source->internal.sample(srcX, srcY, srcZ); 3493 } 3494 3495 internal.write(x, y, z, color); 3496 } 3497 hasStencil() const3498 bool Surface::hasStencil() const 3499 { 3500 return isStencil(external.format); 3501 } 3502 hasDepth() const3503 bool Surface::hasDepth() const 3504 { 3505 return isDepth(external.format); 3506 } 3507 hasPalette() const3508 bool Surface::hasPalette() const 3509 { 3510 return isPalette(external.format); 3511 } 3512 isRenderTarget() const3513 bool Surface::isRenderTarget() const 3514 { 3515 return renderTarget; 3516 } 3517 hasDirtyMipmaps() const3518 bool Surface::hasDirtyMipmaps() const 3519 { 3520 return dirtyMipmaps; 3521 } 3522 cleanMipmaps()3523 void Surface::cleanMipmaps() 3524 { 3525 dirtyMipmaps = false; 3526 } 3527 getResource()3528 Resource *Surface::getResource() 3529 { 3530 return resource; 3531 } 3532 identicalFormats() const3533 bool Surface::identicalFormats() const 3534 { 3535 return external.format == internal.format && 3536 external.width == internal.width && 3537 external.height == internal.height && 3538 external.depth == internal.depth && 3539 external.pitchB == internal.pitchB && 3540 external.sliceB == internal.sliceB; 3541 } 3542 selectInternalFormat(Format format) const3543 Format Surface::selectInternalFormat(Format format) const 3544 { 3545 switch(format) 3546 { 3547 case FORMAT_NULL: 3548 return FORMAT_NULL; 3549 case FORMAT_P8: 3550 case FORMAT_A8P8: 3551 case FORMAT_A4R4G4B4: 3552 case FORMAT_A1R5G5B5: 3553 case FORMAT_A8R3G3B2: 3554 return FORMAT_A8R8G8B8; 3555 case FORMAT_A8: 3556 return FORMAT_A8; 3557 case FORMAT_R8I: 3558 return FORMAT_R8I; 3559 case FORMAT_R8UI: 3560 return FORMAT_R8UI; 3561 case FORMAT_R8I_SNORM: 3562 return FORMAT_R8I_SNORM; 3563 case FORMAT_R8: 3564 return FORMAT_R8; 3565 case FORMAT_R16I: 3566 return FORMAT_R16I; 3567 case FORMAT_R16UI: 3568 return FORMAT_R16UI; 3569 case FORMAT_R32I: 3570 return FORMAT_R32I; 3571 case FORMAT_R32UI: 3572 return FORMAT_R32UI; 3573 case FORMAT_X16B16G16R16I: 3574 case FORMAT_A16B16G16R16I: 3575 return FORMAT_A16B16G16R16I; 3576 case FORMAT_X16B16G16R16UI: 3577 case FORMAT_A16B16G16R16UI: 3578 return FORMAT_A16B16G16R16UI; 3579 case FORMAT_A2R10G10B10: 3580 case FORMAT_A2B10G10R10: 3581 case FORMAT_A16B16G16R16: 3582 return FORMAT_A16B16G16R16; 3583 case FORMAT_X32B32G32R32I: 3584 case FORMAT_A32B32G32R32I: 3585 return FORMAT_A32B32G32R32I; 3586 case FORMAT_X32B32G32R32UI: 3587 case FORMAT_A32B32G32R32UI: 3588 return FORMAT_A32B32G32R32UI; 3589 case FORMAT_G8R8I: 3590 return FORMAT_G8R8I; 3591 case FORMAT_G8R8UI: 3592 return FORMAT_G8R8UI; 3593 case FORMAT_G8R8I_SNORM: 3594 return FORMAT_G8R8I_SNORM; 3595 case FORMAT_G8R8: 3596 return FORMAT_G8R8; 3597 case FORMAT_G16R16I: 3598 return FORMAT_G16R16I; 3599 case FORMAT_G16R16UI: 3600 return FORMAT_G16R16UI; 3601 case FORMAT_G16R16: 3602 return FORMAT_G16R16; 3603 case FORMAT_G32R32I: 3604 return FORMAT_G32R32I; 3605 case FORMAT_G32R32UI: 3606 return FORMAT_G32R32UI; 3607 case FORMAT_A8R8G8B8: 3608 if(lockable || !quadLayoutEnabled) 3609 { 3610 return FORMAT_A8R8G8B8; 3611 } 3612 else 3613 { 3614 return FORMAT_A8G8R8B8Q; 3615 } 3616 case FORMAT_A8B8G8R8I: 3617 return FORMAT_A8B8G8R8I; 3618 case FORMAT_A8B8G8R8UI: 3619 return FORMAT_A8B8G8R8UI; 3620 case FORMAT_A8B8G8R8I_SNORM: 3621 return FORMAT_A8B8G8R8I_SNORM; 3622 case FORMAT_R5G5B5A1: 3623 case FORMAT_R4G4B4A4: 3624 case FORMAT_A8B8G8R8: 3625 return FORMAT_A8B8G8R8; 3626 case FORMAT_R5G6B5: 3627 return FORMAT_R5G6B5; 3628 case FORMAT_R3G3B2: 3629 case FORMAT_R8G8B8: 3630 case FORMAT_X4R4G4B4: 3631 case FORMAT_X1R5G5B5: 3632 case FORMAT_X8R8G8B8: 3633 if(lockable || !quadLayoutEnabled) 3634 { 3635 return FORMAT_X8R8G8B8; 3636 } 3637 else 3638 { 3639 return FORMAT_X8G8R8B8Q; 3640 } 3641 case FORMAT_X8B8G8R8I: 3642 return FORMAT_X8B8G8R8I; 3643 case FORMAT_X8B8G8R8UI: 3644 return FORMAT_X8B8G8R8UI; 3645 case FORMAT_X8B8G8R8I_SNORM: 3646 return FORMAT_X8B8G8R8I_SNORM; 3647 case FORMAT_B8G8R8: 3648 case FORMAT_X8B8G8R8: 3649 return FORMAT_X8B8G8R8; 3650 case FORMAT_SRGB8_X8: 3651 return FORMAT_SRGB8_X8; 3652 case FORMAT_SRGB8_A8: 3653 return FORMAT_SRGB8_A8; 3654 // Compressed formats 3655 #if S3TC_SUPPORT 3656 case FORMAT_DXT1: 3657 case FORMAT_DXT3: 3658 case FORMAT_DXT5: 3659 #endif 3660 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3661 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3662 case FORMAT_RGBA8_ETC2_EAC: 3663 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3664 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3665 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3666 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3667 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3668 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3669 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3670 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3671 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3672 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3673 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3674 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3675 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3676 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3677 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3678 return FORMAT_A8R8G8B8; 3679 case FORMAT_RGBA_ASTC_4x4_KHR: 3680 case FORMAT_RGBA_ASTC_5x4_KHR: 3681 case FORMAT_RGBA_ASTC_5x5_KHR: 3682 case FORMAT_RGBA_ASTC_6x5_KHR: 3683 case FORMAT_RGBA_ASTC_6x6_KHR: 3684 case FORMAT_RGBA_ASTC_8x5_KHR: 3685 case FORMAT_RGBA_ASTC_8x6_KHR: 3686 case FORMAT_RGBA_ASTC_8x8_KHR: 3687 case FORMAT_RGBA_ASTC_10x5_KHR: 3688 case FORMAT_RGBA_ASTC_10x6_KHR: 3689 case FORMAT_RGBA_ASTC_10x8_KHR: 3690 case FORMAT_RGBA_ASTC_10x10_KHR: 3691 case FORMAT_RGBA_ASTC_12x10_KHR: 3692 case FORMAT_RGBA_ASTC_12x12_KHR: 3693 // ASTC supports HDR, so a floating point format is required to represent it properly 3694 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported 3695 case FORMAT_ATI1: 3696 case FORMAT_R11_EAC: 3697 return FORMAT_R8; 3698 case FORMAT_SIGNED_R11_EAC: 3699 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient 3700 case FORMAT_ATI2: 3701 case FORMAT_RG11_EAC: 3702 return FORMAT_G8R8; 3703 case FORMAT_SIGNED_RG11_EAC: 3704 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient 3705 case FORMAT_ETC1: 3706 case FORMAT_RGB8_ETC2: 3707 case FORMAT_SRGB8_ETC2: 3708 return FORMAT_X8R8G8B8; 3709 // Bumpmap formats 3710 case FORMAT_V8U8: return FORMAT_V8U8; 3711 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8; 3712 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8; 3713 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8; 3714 case FORMAT_V16U16: return FORMAT_V16U16; 3715 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16; 3716 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16; 3717 // Floating-point formats 3718 case FORMAT_A16F: return FORMAT_A32B32G32R32F; 3719 case FORMAT_R16F: return FORMAT_R32F; 3720 case FORMAT_G16R16F: return FORMAT_G32R32F; 3721 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F; 3722 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; 3723 case FORMAT_A32F: return FORMAT_A32B32G32R32F; 3724 case FORMAT_R32F: return FORMAT_R32F; 3725 case FORMAT_G32R32F: return FORMAT_G32R32F; 3726 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F; 3727 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F; 3728 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F; 3729 // Luminance formats 3730 case FORMAT_L8: return FORMAT_L8; 3731 case FORMAT_A4L4: return FORMAT_A8L8; 3732 case FORMAT_L16: return FORMAT_L16; 3733 case FORMAT_A8L8: return FORMAT_A8L8; 3734 case FORMAT_L16F: return FORMAT_X32B32G32R32F; 3735 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F; 3736 case FORMAT_L32F: return FORMAT_X32B32G32R32F; 3737 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F; 3738 // Depth/stencil formats 3739 case FORMAT_D16: 3740 case FORMAT_D32: 3741 case FORMAT_D24X8: 3742 case FORMAT_D24S8: 3743 case FORMAT_D24FS8: 3744 if(hasParent) // Texture 3745 { 3746 return FORMAT_D32FS8_SHADOW; 3747 } 3748 else if(complementaryDepthBuffer) 3749 { 3750 return FORMAT_D32F_COMPLEMENTARY; 3751 } 3752 else 3753 { 3754 return FORMAT_D32F; 3755 } 3756 case FORMAT_D32F: return FORMAT_D32F; 3757 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE; 3758 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE; 3759 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; 3760 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; 3761 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; 3762 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; 3763 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; 3764 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; 3765 default: 3766 ASSERT(false); 3767 } 3768 3769 return FORMAT_NULL; 3770 } 3771 setTexturePalette(unsigned int * palette)3772 void Surface::setTexturePalette(unsigned int *palette) 3773 { 3774 Surface::palette = palette; 3775 Surface::paletteID++; 3776 } 3777 resolve()3778 void Surface::resolve() 3779 { 3780 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL) 3781 { 3782 return; 3783 } 3784 3785 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE); 3786 3787 int width = internal.width; 3788 int height = internal.height; 3789 int pitch = internal.pitchB; 3790 int slice = internal.sliceB; 3791 3792 unsigned char *source0 = (unsigned char*)source; 3793 unsigned char *source1 = source0 + slice; 3794 unsigned char *source2 = source1 + slice; 3795 unsigned char *source3 = source2 + slice; 3796 unsigned char *source4 = source3 + slice; 3797 unsigned char *source5 = source4 + slice; 3798 unsigned char *source6 = source5 + slice; 3799 unsigned char *source7 = source6 + slice; 3800 unsigned char *source8 = source7 + slice; 3801 unsigned char *source9 = source8 + slice; 3802 unsigned char *sourceA = source9 + slice; 3803 unsigned char *sourceB = sourceA + slice; 3804 unsigned char *sourceC = sourceB + slice; 3805 unsigned char *sourceD = sourceC + slice; 3806 unsigned char *sourceE = sourceD + slice; 3807 unsigned char *sourceF = sourceE + slice; 3808 3809 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || 3810 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 || 3811 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8) 3812 { 3813 #if defined(__i386__) || defined(__x86_64__) 3814 if(CPUID::supportsSSE2() && (width % 4) == 0) 3815 { 3816 if(internal.depth == 2) 3817 { 3818 for(int y = 0; y < height; y++) 3819 { 3820 for(int x = 0; x < width; x += 4) 3821 { 3822 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3823 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3824 3825 c0 = _mm_avg_epu8(c0, c1); 3826 3827 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3828 } 3829 3830 source0 += pitch; 3831 source1 += pitch; 3832 } 3833 } 3834 else if(internal.depth == 4) 3835 { 3836 for(int y = 0; y < height; y++) 3837 { 3838 for(int x = 0; x < width; x += 4) 3839 { 3840 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3841 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3842 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3843 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3844 3845 c0 = _mm_avg_epu8(c0, c1); 3846 c2 = _mm_avg_epu8(c2, c3); 3847 c0 = _mm_avg_epu8(c0, c2); 3848 3849 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3850 } 3851 3852 source0 += pitch; 3853 source1 += pitch; 3854 source2 += pitch; 3855 source3 += pitch; 3856 } 3857 } 3858 else if(internal.depth == 8) 3859 { 3860 for(int y = 0; y < height; y++) 3861 { 3862 for(int x = 0; x < width; x += 4) 3863 { 3864 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3865 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3866 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3867 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3868 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3869 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3870 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3871 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3872 3873 c0 = _mm_avg_epu8(c0, c1); 3874 c2 = _mm_avg_epu8(c2, c3); 3875 c4 = _mm_avg_epu8(c4, c5); 3876 c6 = _mm_avg_epu8(c6, c7); 3877 c0 = _mm_avg_epu8(c0, c2); 3878 c4 = _mm_avg_epu8(c4, c6); 3879 c0 = _mm_avg_epu8(c0, c4); 3880 3881 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3882 } 3883 3884 source0 += pitch; 3885 source1 += pitch; 3886 source2 += pitch; 3887 source3 += pitch; 3888 source4 += pitch; 3889 source5 += pitch; 3890 source6 += pitch; 3891 source7 += pitch; 3892 } 3893 } 3894 else if(internal.depth == 16) 3895 { 3896 for(int y = 0; y < height; y++) 3897 { 3898 for(int x = 0; x < width; x += 4) 3899 { 3900 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3901 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3902 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3903 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3904 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3905 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3906 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3907 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3908 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 3909 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 3910 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 3911 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 3912 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 3913 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 3914 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 3915 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 3916 3917 c0 = _mm_avg_epu8(c0, c1); 3918 c2 = _mm_avg_epu8(c2, c3); 3919 c4 = _mm_avg_epu8(c4, c5); 3920 c6 = _mm_avg_epu8(c6, c7); 3921 c8 = _mm_avg_epu8(c8, c9); 3922 cA = _mm_avg_epu8(cA, cB); 3923 cC = _mm_avg_epu8(cC, cD); 3924 cE = _mm_avg_epu8(cE, cF); 3925 c0 = _mm_avg_epu8(c0, c2); 3926 c4 = _mm_avg_epu8(c4, c6); 3927 c8 = _mm_avg_epu8(c8, cA); 3928 cC = _mm_avg_epu8(cC, cE); 3929 c0 = _mm_avg_epu8(c0, c4); 3930 c8 = _mm_avg_epu8(c8, cC); 3931 c0 = _mm_avg_epu8(c0, c8); 3932 3933 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3934 } 3935 3936 source0 += pitch; 3937 source1 += pitch; 3938 source2 += pitch; 3939 source3 += pitch; 3940 source4 += pitch; 3941 source5 += pitch; 3942 source6 += pitch; 3943 source7 += pitch; 3944 source8 += pitch; 3945 source9 += pitch; 3946 sourceA += pitch; 3947 sourceB += pitch; 3948 sourceC += pitch; 3949 sourceD += pitch; 3950 sourceE += pitch; 3951 sourceF += pitch; 3952 } 3953 } 3954 else ASSERT(false); 3955 } 3956 else 3957 #endif 3958 { 3959 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101)) 3960 3961 if(internal.depth == 2) 3962 { 3963 for(int y = 0; y < height; y++) 3964 { 3965 for(int x = 0; x < width; x++) 3966 { 3967 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3968 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3969 3970 c0 = AVERAGE(c0, c1); 3971 3972 *(unsigned int*)(source0 + 4 * x) = c0; 3973 } 3974 3975 source0 += pitch; 3976 source1 += pitch; 3977 } 3978 } 3979 else if(internal.depth == 4) 3980 { 3981 for(int y = 0; y < height; y++) 3982 { 3983 for(int x = 0; x < width; x++) 3984 { 3985 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3986 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3987 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3988 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3989 3990 c0 = AVERAGE(c0, c1); 3991 c2 = AVERAGE(c2, c3); 3992 c0 = AVERAGE(c0, c2); 3993 3994 *(unsigned int*)(source0 + 4 * x) = c0; 3995 } 3996 3997 source0 += pitch; 3998 source1 += pitch; 3999 source2 += pitch; 4000 source3 += pitch; 4001 } 4002 } 4003 else if(internal.depth == 8) 4004 { 4005 for(int y = 0; y < height; y++) 4006 { 4007 for(int x = 0; x < width; x++) 4008 { 4009 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4010 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4011 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4012 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4013 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4014 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4015 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4016 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4017 4018 c0 = AVERAGE(c0, c1); 4019 c2 = AVERAGE(c2, c3); 4020 c4 = AVERAGE(c4, c5); 4021 c6 = AVERAGE(c6, c7); 4022 c0 = AVERAGE(c0, c2); 4023 c4 = AVERAGE(c4, c6); 4024 c0 = AVERAGE(c0, c4); 4025 4026 *(unsigned int*)(source0 + 4 * x) = c0; 4027 } 4028 4029 source0 += pitch; 4030 source1 += pitch; 4031 source2 += pitch; 4032 source3 += pitch; 4033 source4 += pitch; 4034 source5 += pitch; 4035 source6 += pitch; 4036 source7 += pitch; 4037 } 4038 } 4039 else if(internal.depth == 16) 4040 { 4041 for(int y = 0; y < height; y++) 4042 { 4043 for(int x = 0; x < width; x++) 4044 { 4045 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4046 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4047 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4048 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4049 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4050 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4051 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4052 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4053 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4054 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4055 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4056 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4057 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4058 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4059 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4060 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4061 4062 c0 = AVERAGE(c0, c1); 4063 c2 = AVERAGE(c2, c3); 4064 c4 = AVERAGE(c4, c5); 4065 c6 = AVERAGE(c6, c7); 4066 c8 = AVERAGE(c8, c9); 4067 cA = AVERAGE(cA, cB); 4068 cC = AVERAGE(cC, cD); 4069 cE = AVERAGE(cE, cF); 4070 c0 = AVERAGE(c0, c2); 4071 c4 = AVERAGE(c4, c6); 4072 c8 = AVERAGE(c8, cA); 4073 cC = AVERAGE(cC, cE); 4074 c0 = AVERAGE(c0, c4); 4075 c8 = AVERAGE(c8, cC); 4076 c0 = AVERAGE(c0, c8); 4077 4078 *(unsigned int*)(source0 + 4 * x) = c0; 4079 } 4080 4081 source0 += pitch; 4082 source1 += pitch; 4083 source2 += pitch; 4084 source3 += pitch; 4085 source4 += pitch; 4086 source5 += pitch; 4087 source6 += pitch; 4088 source7 += pitch; 4089 source8 += pitch; 4090 source9 += pitch; 4091 sourceA += pitch; 4092 sourceB += pitch; 4093 sourceC += pitch; 4094 sourceD += pitch; 4095 sourceE += pitch; 4096 sourceF += pitch; 4097 } 4098 } 4099 else ASSERT(false); 4100 4101 #undef AVERAGE 4102 } 4103 } 4104 else if(internal.format == FORMAT_G16R16) 4105 { 4106 4107 #if defined(__i386__) || defined(__x86_64__) 4108 if(CPUID::supportsSSE2() && (width % 4) == 0) 4109 { 4110 if(internal.depth == 2) 4111 { 4112 for(int y = 0; y < height; y++) 4113 { 4114 for(int x = 0; x < width; x += 4) 4115 { 4116 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4117 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4118 4119 c0 = _mm_avg_epu16(c0, c1); 4120 4121 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4122 } 4123 4124 source0 += pitch; 4125 source1 += pitch; 4126 } 4127 } 4128 else if(internal.depth == 4) 4129 { 4130 for(int y = 0; y < height; y++) 4131 { 4132 for(int x = 0; x < width; x += 4) 4133 { 4134 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4135 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4136 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4137 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4138 4139 c0 = _mm_avg_epu16(c0, c1); 4140 c2 = _mm_avg_epu16(c2, c3); 4141 c0 = _mm_avg_epu16(c0, c2); 4142 4143 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4144 } 4145 4146 source0 += pitch; 4147 source1 += pitch; 4148 source2 += pitch; 4149 source3 += pitch; 4150 } 4151 } 4152 else if(internal.depth == 8) 4153 { 4154 for(int y = 0; y < height; y++) 4155 { 4156 for(int x = 0; x < width; x += 4) 4157 { 4158 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4159 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4160 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4161 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4162 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4163 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4164 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4165 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4166 4167 c0 = _mm_avg_epu16(c0, c1); 4168 c2 = _mm_avg_epu16(c2, c3); 4169 c4 = _mm_avg_epu16(c4, c5); 4170 c6 = _mm_avg_epu16(c6, c7); 4171 c0 = _mm_avg_epu16(c0, c2); 4172 c4 = _mm_avg_epu16(c4, c6); 4173 c0 = _mm_avg_epu16(c0, c4); 4174 4175 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4176 } 4177 4178 source0 += pitch; 4179 source1 += pitch; 4180 source2 += pitch; 4181 source3 += pitch; 4182 source4 += pitch; 4183 source5 += pitch; 4184 source6 += pitch; 4185 source7 += pitch; 4186 } 4187 } 4188 else if(internal.depth == 16) 4189 { 4190 for(int y = 0; y < height; y++) 4191 { 4192 for(int x = 0; x < width; x += 4) 4193 { 4194 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4195 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4196 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4197 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4198 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4199 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4200 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4201 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4202 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4203 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4204 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4205 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4206 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4207 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4208 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4209 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4210 4211 c0 = _mm_avg_epu16(c0, c1); 4212 c2 = _mm_avg_epu16(c2, c3); 4213 c4 = _mm_avg_epu16(c4, c5); 4214 c6 = _mm_avg_epu16(c6, c7); 4215 c8 = _mm_avg_epu16(c8, c9); 4216 cA = _mm_avg_epu16(cA, cB); 4217 cC = _mm_avg_epu16(cC, cD); 4218 cE = _mm_avg_epu16(cE, cF); 4219 c0 = _mm_avg_epu16(c0, c2); 4220 c4 = _mm_avg_epu16(c4, c6); 4221 c8 = _mm_avg_epu16(c8, cA); 4222 cC = _mm_avg_epu16(cC, cE); 4223 c0 = _mm_avg_epu16(c0, c4); 4224 c8 = _mm_avg_epu16(c8, cC); 4225 c0 = _mm_avg_epu16(c0, c8); 4226 4227 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4228 } 4229 4230 source0 += pitch; 4231 source1 += pitch; 4232 source2 += pitch; 4233 source3 += pitch; 4234 source4 += pitch; 4235 source5 += pitch; 4236 source6 += pitch; 4237 source7 += pitch; 4238 source8 += pitch; 4239 source9 += pitch; 4240 sourceA += pitch; 4241 sourceB += pitch; 4242 sourceC += pitch; 4243 sourceD += pitch; 4244 sourceE += pitch; 4245 sourceF += pitch; 4246 } 4247 } 4248 else ASSERT(false); 4249 } 4250 else 4251 #endif 4252 { 4253 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4254 4255 if(internal.depth == 2) 4256 { 4257 for(int y = 0; y < height; y++) 4258 { 4259 for(int x = 0; x < width; x++) 4260 { 4261 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4262 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4263 4264 c0 = AVERAGE(c0, c1); 4265 4266 *(unsigned int*)(source0 + 4 * x) = c0; 4267 } 4268 4269 source0 += pitch; 4270 source1 += pitch; 4271 } 4272 } 4273 else if(internal.depth == 4) 4274 { 4275 for(int y = 0; y < height; y++) 4276 { 4277 for(int x = 0; x < width; x++) 4278 { 4279 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4280 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4281 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4282 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4283 4284 c0 = AVERAGE(c0, c1); 4285 c2 = AVERAGE(c2, c3); 4286 c0 = AVERAGE(c0, c2); 4287 4288 *(unsigned int*)(source0 + 4 * x) = c0; 4289 } 4290 4291 source0 += pitch; 4292 source1 += pitch; 4293 source2 += pitch; 4294 source3 += pitch; 4295 } 4296 } 4297 else if(internal.depth == 8) 4298 { 4299 for(int y = 0; y < height; y++) 4300 { 4301 for(int x = 0; x < width; x++) 4302 { 4303 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4304 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4305 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4306 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4307 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4308 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4309 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4310 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4311 4312 c0 = AVERAGE(c0, c1); 4313 c2 = AVERAGE(c2, c3); 4314 c4 = AVERAGE(c4, c5); 4315 c6 = AVERAGE(c6, c7); 4316 c0 = AVERAGE(c0, c2); 4317 c4 = AVERAGE(c4, c6); 4318 c0 = AVERAGE(c0, c4); 4319 4320 *(unsigned int*)(source0 + 4 * x) = c0; 4321 } 4322 4323 source0 += pitch; 4324 source1 += pitch; 4325 source2 += pitch; 4326 source3 += pitch; 4327 source4 += pitch; 4328 source5 += pitch; 4329 source6 += pitch; 4330 source7 += pitch; 4331 } 4332 } 4333 else if(internal.depth == 16) 4334 { 4335 for(int y = 0; y < height; y++) 4336 { 4337 for(int x = 0; x < width; x++) 4338 { 4339 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4340 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4341 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4342 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4343 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4344 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4345 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4346 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4347 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4348 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4349 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4350 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4351 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4352 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4353 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4354 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4355 4356 c0 = AVERAGE(c0, c1); 4357 c2 = AVERAGE(c2, c3); 4358 c4 = AVERAGE(c4, c5); 4359 c6 = AVERAGE(c6, c7); 4360 c8 = AVERAGE(c8, c9); 4361 cA = AVERAGE(cA, cB); 4362 cC = AVERAGE(cC, cD); 4363 cE = AVERAGE(cE, cF); 4364 c0 = AVERAGE(c0, c2); 4365 c4 = AVERAGE(c4, c6); 4366 c8 = AVERAGE(c8, cA); 4367 cC = AVERAGE(cC, cE); 4368 c0 = AVERAGE(c0, c4); 4369 c8 = AVERAGE(c8, cC); 4370 c0 = AVERAGE(c0, c8); 4371 4372 *(unsigned int*)(source0 + 4 * x) = c0; 4373 } 4374 4375 source0 += pitch; 4376 source1 += pitch; 4377 source2 += pitch; 4378 source3 += pitch; 4379 source4 += pitch; 4380 source5 += pitch; 4381 source6 += pitch; 4382 source7 += pitch; 4383 source8 += pitch; 4384 source9 += pitch; 4385 sourceA += pitch; 4386 sourceB += pitch; 4387 sourceC += pitch; 4388 sourceD += pitch; 4389 sourceE += pitch; 4390 sourceF += pitch; 4391 } 4392 } 4393 else ASSERT(false); 4394 4395 #undef AVERAGE 4396 } 4397 } 4398 else if(internal.format == FORMAT_A16B16G16R16) 4399 { 4400 #if defined(__i386__) || defined(__x86_64__) 4401 if(CPUID::supportsSSE2() && (width % 2) == 0) 4402 { 4403 if(internal.depth == 2) 4404 { 4405 for(int y = 0; y < height; y++) 4406 { 4407 for(int x = 0; x < width; x += 2) 4408 { 4409 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4410 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4411 4412 c0 = _mm_avg_epu16(c0, c1); 4413 4414 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4415 } 4416 4417 source0 += pitch; 4418 source1 += pitch; 4419 } 4420 } 4421 else if(internal.depth == 4) 4422 { 4423 for(int y = 0; y < height; y++) 4424 { 4425 for(int x = 0; x < width; x += 2) 4426 { 4427 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4428 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4429 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4430 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4431 4432 c0 = _mm_avg_epu16(c0, c1); 4433 c2 = _mm_avg_epu16(c2, c3); 4434 c0 = _mm_avg_epu16(c0, c2); 4435 4436 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4437 } 4438 4439 source0 += pitch; 4440 source1 += pitch; 4441 source2 += pitch; 4442 source3 += pitch; 4443 } 4444 } 4445 else if(internal.depth == 8) 4446 { 4447 for(int y = 0; y < height; y++) 4448 { 4449 for(int x = 0; x < width; x += 2) 4450 { 4451 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4452 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4453 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4454 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4455 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4456 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4457 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4458 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4459 4460 c0 = _mm_avg_epu16(c0, c1); 4461 c2 = _mm_avg_epu16(c2, c3); 4462 c4 = _mm_avg_epu16(c4, c5); 4463 c6 = _mm_avg_epu16(c6, c7); 4464 c0 = _mm_avg_epu16(c0, c2); 4465 c4 = _mm_avg_epu16(c4, c6); 4466 c0 = _mm_avg_epu16(c0, c4); 4467 4468 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4469 } 4470 4471 source0 += pitch; 4472 source1 += pitch; 4473 source2 += pitch; 4474 source3 += pitch; 4475 source4 += pitch; 4476 source5 += pitch; 4477 source6 += pitch; 4478 source7 += pitch; 4479 } 4480 } 4481 else if(internal.depth == 16) 4482 { 4483 for(int y = 0; y < height; y++) 4484 { 4485 for(int x = 0; x < width; x += 2) 4486 { 4487 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4488 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4489 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4490 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4491 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4492 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4493 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4494 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4495 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x)); 4496 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x)); 4497 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x)); 4498 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x)); 4499 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x)); 4500 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x)); 4501 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x)); 4502 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x)); 4503 4504 c0 = _mm_avg_epu16(c0, c1); 4505 c2 = _mm_avg_epu16(c2, c3); 4506 c4 = _mm_avg_epu16(c4, c5); 4507 c6 = _mm_avg_epu16(c6, c7); 4508 c8 = _mm_avg_epu16(c8, c9); 4509 cA = _mm_avg_epu16(cA, cB); 4510 cC = _mm_avg_epu16(cC, cD); 4511 cE = _mm_avg_epu16(cE, cF); 4512 c0 = _mm_avg_epu16(c0, c2); 4513 c4 = _mm_avg_epu16(c4, c6); 4514 c8 = _mm_avg_epu16(c8, cA); 4515 cC = _mm_avg_epu16(cC, cE); 4516 c0 = _mm_avg_epu16(c0, c4); 4517 c8 = _mm_avg_epu16(c8, cC); 4518 c0 = _mm_avg_epu16(c0, c8); 4519 4520 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4521 } 4522 4523 source0 += pitch; 4524 source1 += pitch; 4525 source2 += pitch; 4526 source3 += pitch; 4527 source4 += pitch; 4528 source5 += pitch; 4529 source6 += pitch; 4530 source7 += pitch; 4531 source8 += pitch; 4532 source9 += pitch; 4533 sourceA += pitch; 4534 sourceB += pitch; 4535 sourceC += pitch; 4536 sourceD += pitch; 4537 sourceE += pitch; 4538 sourceF += pitch; 4539 } 4540 } 4541 else ASSERT(false); 4542 } 4543 else 4544 #endif 4545 { 4546 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4547 4548 if(internal.depth == 2) 4549 { 4550 for(int y = 0; y < height; y++) 4551 { 4552 for(int x = 0; x < 2 * width; x++) 4553 { 4554 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4555 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4556 4557 c0 = AVERAGE(c0, c1); 4558 4559 *(unsigned int*)(source0 + 4 * x) = c0; 4560 } 4561 4562 source0 += pitch; 4563 source1 += pitch; 4564 } 4565 } 4566 else if(internal.depth == 4) 4567 { 4568 for(int y = 0; y < height; y++) 4569 { 4570 for(int x = 0; x < 2 * width; x++) 4571 { 4572 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4573 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4574 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4575 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4576 4577 c0 = AVERAGE(c0, c1); 4578 c2 = AVERAGE(c2, c3); 4579 c0 = AVERAGE(c0, c2); 4580 4581 *(unsigned int*)(source0 + 4 * x) = c0; 4582 } 4583 4584 source0 += pitch; 4585 source1 += pitch; 4586 source2 += pitch; 4587 source3 += pitch; 4588 } 4589 } 4590 else if(internal.depth == 8) 4591 { 4592 for(int y = 0; y < height; y++) 4593 { 4594 for(int x = 0; x < 2 * width; x++) 4595 { 4596 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4597 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4598 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4599 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4600 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4601 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4602 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4603 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4604 4605 c0 = AVERAGE(c0, c1); 4606 c2 = AVERAGE(c2, c3); 4607 c4 = AVERAGE(c4, c5); 4608 c6 = AVERAGE(c6, c7); 4609 c0 = AVERAGE(c0, c2); 4610 c4 = AVERAGE(c4, c6); 4611 c0 = AVERAGE(c0, c4); 4612 4613 *(unsigned int*)(source0 + 4 * x) = c0; 4614 } 4615 4616 source0 += pitch; 4617 source1 += pitch; 4618 source2 += pitch; 4619 source3 += pitch; 4620 source4 += pitch; 4621 source5 += pitch; 4622 source6 += pitch; 4623 source7 += pitch; 4624 } 4625 } 4626 else if(internal.depth == 16) 4627 { 4628 for(int y = 0; y < height; y++) 4629 { 4630 for(int x = 0; x < 2 * width; x++) 4631 { 4632 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4633 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4634 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4635 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4636 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4637 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4638 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4639 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4640 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4641 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4642 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4643 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4644 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4645 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4646 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4647 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4648 4649 c0 = AVERAGE(c0, c1); 4650 c2 = AVERAGE(c2, c3); 4651 c4 = AVERAGE(c4, c5); 4652 c6 = AVERAGE(c6, c7); 4653 c8 = AVERAGE(c8, c9); 4654 cA = AVERAGE(cA, cB); 4655 cC = AVERAGE(cC, cD); 4656 cE = AVERAGE(cE, cF); 4657 c0 = AVERAGE(c0, c2); 4658 c4 = AVERAGE(c4, c6); 4659 c8 = AVERAGE(c8, cA); 4660 cC = AVERAGE(cC, cE); 4661 c0 = AVERAGE(c0, c4); 4662 c8 = AVERAGE(c8, cC); 4663 c0 = AVERAGE(c0, c8); 4664 4665 *(unsigned int*)(source0 + 4 * x) = c0; 4666 } 4667 4668 source0 += pitch; 4669 source1 += pitch; 4670 source2 += pitch; 4671 source3 += pitch; 4672 source4 += pitch; 4673 source5 += pitch; 4674 source6 += pitch; 4675 source7 += pitch; 4676 source8 += pitch; 4677 source9 += pitch; 4678 sourceA += pitch; 4679 sourceB += pitch; 4680 sourceC += pitch; 4681 sourceD += pitch; 4682 sourceE += pitch; 4683 sourceF += pitch; 4684 } 4685 } 4686 else ASSERT(false); 4687 4688 #undef AVERAGE 4689 } 4690 } 4691 else if(internal.format == FORMAT_R32F) 4692 { 4693 #if defined(__i386__) || defined(__x86_64__) 4694 if(CPUID::supportsSSE() && (width % 4) == 0) 4695 { 4696 if(internal.depth == 2) 4697 { 4698 for(int y = 0; y < height; y++) 4699 { 4700 for(int x = 0; x < width; x += 4) 4701 { 4702 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4703 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4704 4705 c0 = _mm_add_ps(c0, c1); 4706 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4707 4708 _mm_store_ps((float*)(source0 + 4 * x), c0); 4709 } 4710 4711 source0 += pitch; 4712 source1 += pitch; 4713 } 4714 } 4715 else if(internal.depth == 4) 4716 { 4717 for(int y = 0; y < height; y++) 4718 { 4719 for(int x = 0; x < width; x += 4) 4720 { 4721 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4722 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4723 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4724 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4725 4726 c0 = _mm_add_ps(c0, c1); 4727 c2 = _mm_add_ps(c2, c3); 4728 c0 = _mm_add_ps(c0, c2); 4729 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4730 4731 _mm_store_ps((float*)(source0 + 4 * x), c0); 4732 } 4733 4734 source0 += pitch; 4735 source1 += pitch; 4736 source2 += pitch; 4737 source3 += pitch; 4738 } 4739 } 4740 else if(internal.depth == 8) 4741 { 4742 for(int y = 0; y < height; y++) 4743 { 4744 for(int x = 0; x < width; x += 4) 4745 { 4746 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4747 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4748 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4749 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4750 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4751 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4752 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4753 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4754 4755 c0 = _mm_add_ps(c0, c1); 4756 c2 = _mm_add_ps(c2, c3); 4757 c4 = _mm_add_ps(c4, c5); 4758 c6 = _mm_add_ps(c6, c7); 4759 c0 = _mm_add_ps(c0, c2); 4760 c4 = _mm_add_ps(c4, c6); 4761 c0 = _mm_add_ps(c0, c4); 4762 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4763 4764 _mm_store_ps((float*)(source0 + 4 * x), c0); 4765 } 4766 4767 source0 += pitch; 4768 source1 += pitch; 4769 source2 += pitch; 4770 source3 += pitch; 4771 source4 += pitch; 4772 source5 += pitch; 4773 source6 += pitch; 4774 source7 += pitch; 4775 } 4776 } 4777 else if(internal.depth == 16) 4778 { 4779 for(int y = 0; y < height; y++) 4780 { 4781 for(int x = 0; x < width; x += 4) 4782 { 4783 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4784 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4785 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4786 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4787 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4788 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4789 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4790 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4791 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x)); 4792 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x)); 4793 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x)); 4794 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x)); 4795 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x)); 4796 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x)); 4797 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x)); 4798 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x)); 4799 4800 c0 = _mm_add_ps(c0, c1); 4801 c2 = _mm_add_ps(c2, c3); 4802 c4 = _mm_add_ps(c4, c5); 4803 c6 = _mm_add_ps(c6, c7); 4804 c8 = _mm_add_ps(c8, c9); 4805 cA = _mm_add_ps(cA, cB); 4806 cC = _mm_add_ps(cC, cD); 4807 cE = _mm_add_ps(cE, cF); 4808 c0 = _mm_add_ps(c0, c2); 4809 c4 = _mm_add_ps(c4, c6); 4810 c8 = _mm_add_ps(c8, cA); 4811 cC = _mm_add_ps(cC, cE); 4812 c0 = _mm_add_ps(c0, c4); 4813 c8 = _mm_add_ps(c8, cC); 4814 c0 = _mm_add_ps(c0, c8); 4815 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 4816 4817 _mm_store_ps((float*)(source0 + 4 * x), c0); 4818 } 4819 4820 source0 += pitch; 4821 source1 += pitch; 4822 source2 += pitch; 4823 source3 += pitch; 4824 source4 += pitch; 4825 source5 += pitch; 4826 source6 += pitch; 4827 source7 += pitch; 4828 source8 += pitch; 4829 source9 += pitch; 4830 sourceA += pitch; 4831 sourceB += pitch; 4832 sourceC += pitch; 4833 sourceD += pitch; 4834 sourceE += pitch; 4835 sourceF += pitch; 4836 } 4837 } 4838 else ASSERT(false); 4839 } 4840 else 4841 #endif 4842 { 4843 if(internal.depth == 2) 4844 { 4845 for(int y = 0; y < height; y++) 4846 { 4847 for(int x = 0; x < width; x++) 4848 { 4849 float c0 = *(float*)(source0 + 4 * x); 4850 float c1 = *(float*)(source1 + 4 * x); 4851 4852 c0 = c0 + c1; 4853 c0 *= 1.0f / 2.0f; 4854 4855 *(float*)(source0 + 4 * x) = c0; 4856 } 4857 4858 source0 += pitch; 4859 source1 += pitch; 4860 } 4861 } 4862 else if(internal.depth == 4) 4863 { 4864 for(int y = 0; y < height; y++) 4865 { 4866 for(int x = 0; x < width; x++) 4867 { 4868 float c0 = *(float*)(source0 + 4 * x); 4869 float c1 = *(float*)(source1 + 4 * x); 4870 float c2 = *(float*)(source2 + 4 * x); 4871 float c3 = *(float*)(source3 + 4 * x); 4872 4873 c0 = c0 + c1; 4874 c2 = c2 + c3; 4875 c0 = c0 + c2; 4876 c0 *= 1.0f / 4.0f; 4877 4878 *(float*)(source0 + 4 * x) = c0; 4879 } 4880 4881 source0 += pitch; 4882 source1 += pitch; 4883 source2 += pitch; 4884 source3 += pitch; 4885 } 4886 } 4887 else if(internal.depth == 8) 4888 { 4889 for(int y = 0; y < height; y++) 4890 { 4891 for(int x = 0; x < width; x++) 4892 { 4893 float c0 = *(float*)(source0 + 4 * x); 4894 float c1 = *(float*)(source1 + 4 * x); 4895 float c2 = *(float*)(source2 + 4 * x); 4896 float c3 = *(float*)(source3 + 4 * x); 4897 float c4 = *(float*)(source4 + 4 * x); 4898 float c5 = *(float*)(source5 + 4 * x); 4899 float c6 = *(float*)(source6 + 4 * x); 4900 float c7 = *(float*)(source7 + 4 * x); 4901 4902 c0 = c0 + c1; 4903 c2 = c2 + c3; 4904 c4 = c4 + c5; 4905 c6 = c6 + c7; 4906 c0 = c0 + c2; 4907 c4 = c4 + c6; 4908 c0 = c0 + c4; 4909 c0 *= 1.0f / 8.0f; 4910 4911 *(float*)(source0 + 4 * x) = c0; 4912 } 4913 4914 source0 += pitch; 4915 source1 += pitch; 4916 source2 += pitch; 4917 source3 += pitch; 4918 source4 += pitch; 4919 source5 += pitch; 4920 source6 += pitch; 4921 source7 += pitch; 4922 } 4923 } 4924 else if(internal.depth == 16) 4925 { 4926 for(int y = 0; y < height; y++) 4927 { 4928 for(int x = 0; x < width; x++) 4929 { 4930 float c0 = *(float*)(source0 + 4 * x); 4931 float c1 = *(float*)(source1 + 4 * x); 4932 float c2 = *(float*)(source2 + 4 * x); 4933 float c3 = *(float*)(source3 + 4 * x); 4934 float c4 = *(float*)(source4 + 4 * x); 4935 float c5 = *(float*)(source5 + 4 * x); 4936 float c6 = *(float*)(source6 + 4 * x); 4937 float c7 = *(float*)(source7 + 4 * x); 4938 float c8 = *(float*)(source8 + 4 * x); 4939 float c9 = *(float*)(source9 + 4 * x); 4940 float cA = *(float*)(sourceA + 4 * x); 4941 float cB = *(float*)(sourceB + 4 * x); 4942 float cC = *(float*)(sourceC + 4 * x); 4943 float cD = *(float*)(sourceD + 4 * x); 4944 float cE = *(float*)(sourceE + 4 * x); 4945 float cF = *(float*)(sourceF + 4 * x); 4946 4947 c0 = c0 + c1; 4948 c2 = c2 + c3; 4949 c4 = c4 + c5; 4950 c6 = c6 + c7; 4951 c8 = c8 + c9; 4952 cA = cA + cB; 4953 cC = cC + cD; 4954 cE = cE + cF; 4955 c0 = c0 + c2; 4956 c4 = c4 + c6; 4957 c8 = c8 + cA; 4958 cC = cC + cE; 4959 c0 = c0 + c4; 4960 c8 = c8 + cC; 4961 c0 = c0 + c8; 4962 c0 *= 1.0f / 16.0f; 4963 4964 *(float*)(source0 + 4 * x) = c0; 4965 } 4966 4967 source0 += pitch; 4968 source1 += pitch; 4969 source2 += pitch; 4970 source3 += pitch; 4971 source4 += pitch; 4972 source5 += pitch; 4973 source6 += pitch; 4974 source7 += pitch; 4975 source8 += pitch; 4976 source9 += pitch; 4977 sourceA += pitch; 4978 sourceB += pitch; 4979 sourceC += pitch; 4980 sourceD += pitch; 4981 sourceE += pitch; 4982 sourceF += pitch; 4983 } 4984 } 4985 else ASSERT(false); 4986 } 4987 } 4988 else if(internal.format == FORMAT_G32R32F) 4989 { 4990 #if defined(__i386__) || defined(__x86_64__) 4991 if(CPUID::supportsSSE() && (width % 2) == 0) 4992 { 4993 if(internal.depth == 2) 4994 { 4995 for(int y = 0; y < height; y++) 4996 { 4997 for(int x = 0; x < width; x += 2) 4998 { 4999 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5000 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5001 5002 c0 = _mm_add_ps(c0, c1); 5003 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5004 5005 _mm_store_ps((float*)(source0 + 8 * x), c0); 5006 } 5007 5008 source0 += pitch; 5009 source1 += pitch; 5010 } 5011 } 5012 else if(internal.depth == 4) 5013 { 5014 for(int y = 0; y < height; y++) 5015 { 5016 for(int x = 0; x < width; x += 2) 5017 { 5018 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5019 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5020 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5021 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5022 5023 c0 = _mm_add_ps(c0, c1); 5024 c2 = _mm_add_ps(c2, c3); 5025 c0 = _mm_add_ps(c0, c2); 5026 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5027 5028 _mm_store_ps((float*)(source0 + 8 * x), c0); 5029 } 5030 5031 source0 += pitch; 5032 source1 += pitch; 5033 source2 += pitch; 5034 source3 += pitch; 5035 } 5036 } 5037 else if(internal.depth == 8) 5038 { 5039 for(int y = 0; y < height; y++) 5040 { 5041 for(int x = 0; x < width; x += 2) 5042 { 5043 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5044 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5045 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5046 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5047 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5048 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5049 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5050 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5051 5052 c0 = _mm_add_ps(c0, c1); 5053 c2 = _mm_add_ps(c2, c3); 5054 c4 = _mm_add_ps(c4, c5); 5055 c6 = _mm_add_ps(c6, c7); 5056 c0 = _mm_add_ps(c0, c2); 5057 c4 = _mm_add_ps(c4, c6); 5058 c0 = _mm_add_ps(c0, c4); 5059 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5060 5061 _mm_store_ps((float*)(source0 + 8 * x), c0); 5062 } 5063 5064 source0 += pitch; 5065 source1 += pitch; 5066 source2 += pitch; 5067 source3 += pitch; 5068 source4 += pitch; 5069 source5 += pitch; 5070 source6 += pitch; 5071 source7 += pitch; 5072 } 5073 } 5074 else if(internal.depth == 16) 5075 { 5076 for(int y = 0; y < height; y++) 5077 { 5078 for(int x = 0; x < width; x += 2) 5079 { 5080 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5081 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5082 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5083 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5084 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5085 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5086 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5087 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5088 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x)); 5089 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x)); 5090 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x)); 5091 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x)); 5092 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x)); 5093 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x)); 5094 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x)); 5095 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x)); 5096 5097 c0 = _mm_add_ps(c0, c1); 5098 c2 = _mm_add_ps(c2, c3); 5099 c4 = _mm_add_ps(c4, c5); 5100 c6 = _mm_add_ps(c6, c7); 5101 c8 = _mm_add_ps(c8, c9); 5102 cA = _mm_add_ps(cA, cB); 5103 cC = _mm_add_ps(cC, cD); 5104 cE = _mm_add_ps(cE, cF); 5105 c0 = _mm_add_ps(c0, c2); 5106 c4 = _mm_add_ps(c4, c6); 5107 c8 = _mm_add_ps(c8, cA); 5108 cC = _mm_add_ps(cC, cE); 5109 c0 = _mm_add_ps(c0, c4); 5110 c8 = _mm_add_ps(c8, cC); 5111 c0 = _mm_add_ps(c0, c8); 5112 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5113 5114 _mm_store_ps((float*)(source0 + 8 * x), c0); 5115 } 5116 5117 source0 += pitch; 5118 source1 += pitch; 5119 source2 += pitch; 5120 source3 += pitch; 5121 source4 += pitch; 5122 source5 += pitch; 5123 source6 += pitch; 5124 source7 += pitch; 5125 source8 += pitch; 5126 source9 += pitch; 5127 sourceA += pitch; 5128 sourceB += pitch; 5129 sourceC += pitch; 5130 sourceD += pitch; 5131 sourceE += pitch; 5132 sourceF += pitch; 5133 } 5134 } 5135 else ASSERT(false); 5136 } 5137 else 5138 #endif 5139 { 5140 if(internal.depth == 2) 5141 { 5142 for(int y = 0; y < height; y++) 5143 { 5144 for(int x = 0; x < 2 * width; x++) 5145 { 5146 float c0 = *(float*)(source0 + 4 * x); 5147 float c1 = *(float*)(source1 + 4 * x); 5148 5149 c0 = c0 + c1; 5150 c0 *= 1.0f / 2.0f; 5151 5152 *(float*)(source0 + 4 * x) = c0; 5153 } 5154 5155 source0 += pitch; 5156 source1 += pitch; 5157 } 5158 } 5159 else if(internal.depth == 4) 5160 { 5161 for(int y = 0; y < height; y++) 5162 { 5163 for(int x = 0; x < 2 * width; x++) 5164 { 5165 float c0 = *(float*)(source0 + 4 * x); 5166 float c1 = *(float*)(source1 + 4 * x); 5167 float c2 = *(float*)(source2 + 4 * x); 5168 float c3 = *(float*)(source3 + 4 * x); 5169 5170 c0 = c0 + c1; 5171 c2 = c2 + c3; 5172 c0 = c0 + c2; 5173 c0 *= 1.0f / 4.0f; 5174 5175 *(float*)(source0 + 4 * x) = c0; 5176 } 5177 5178 source0 += pitch; 5179 source1 += pitch; 5180 source2 += pitch; 5181 source3 += pitch; 5182 } 5183 } 5184 else if(internal.depth == 8) 5185 { 5186 for(int y = 0; y < height; y++) 5187 { 5188 for(int x = 0; x < 2 * width; x++) 5189 { 5190 float c0 = *(float*)(source0 + 4 * x); 5191 float c1 = *(float*)(source1 + 4 * x); 5192 float c2 = *(float*)(source2 + 4 * x); 5193 float c3 = *(float*)(source3 + 4 * x); 5194 float c4 = *(float*)(source4 + 4 * x); 5195 float c5 = *(float*)(source5 + 4 * x); 5196 float c6 = *(float*)(source6 + 4 * x); 5197 float c7 = *(float*)(source7 + 4 * x); 5198 5199 c0 = c0 + c1; 5200 c2 = c2 + c3; 5201 c4 = c4 + c5; 5202 c6 = c6 + c7; 5203 c0 = c0 + c2; 5204 c4 = c4 + c6; 5205 c0 = c0 + c4; 5206 c0 *= 1.0f / 8.0f; 5207 5208 *(float*)(source0 + 4 * x) = c0; 5209 } 5210 5211 source0 += pitch; 5212 source1 += pitch; 5213 source2 += pitch; 5214 source3 += pitch; 5215 source4 += pitch; 5216 source5 += pitch; 5217 source6 += pitch; 5218 source7 += pitch; 5219 } 5220 } 5221 else if(internal.depth == 16) 5222 { 5223 for(int y = 0; y < height; y++) 5224 { 5225 for(int x = 0; x < 2 * width; x++) 5226 { 5227 float c0 = *(float*)(source0 + 4 * x); 5228 float c1 = *(float*)(source1 + 4 * x); 5229 float c2 = *(float*)(source2 + 4 * x); 5230 float c3 = *(float*)(source3 + 4 * x); 5231 float c4 = *(float*)(source4 + 4 * x); 5232 float c5 = *(float*)(source5 + 4 * x); 5233 float c6 = *(float*)(source6 + 4 * x); 5234 float c7 = *(float*)(source7 + 4 * x); 5235 float c8 = *(float*)(source8 + 4 * x); 5236 float c9 = *(float*)(source9 + 4 * x); 5237 float cA = *(float*)(sourceA + 4 * x); 5238 float cB = *(float*)(sourceB + 4 * x); 5239 float cC = *(float*)(sourceC + 4 * x); 5240 float cD = *(float*)(sourceD + 4 * x); 5241 float cE = *(float*)(sourceE + 4 * x); 5242 float cF = *(float*)(sourceF + 4 * x); 5243 5244 c0 = c0 + c1; 5245 c2 = c2 + c3; 5246 c4 = c4 + c5; 5247 c6 = c6 + c7; 5248 c8 = c8 + c9; 5249 cA = cA + cB; 5250 cC = cC + cD; 5251 cE = cE + cF; 5252 c0 = c0 + c2; 5253 c4 = c4 + c6; 5254 c8 = c8 + cA; 5255 cC = cC + cE; 5256 c0 = c0 + c4; 5257 c8 = c8 + cC; 5258 c0 = c0 + c8; 5259 c0 *= 1.0f / 16.0f; 5260 5261 *(float*)(source0 + 4 * x) = c0; 5262 } 5263 5264 source0 += pitch; 5265 source1 += pitch; 5266 source2 += pitch; 5267 source3 += pitch; 5268 source4 += pitch; 5269 source5 += pitch; 5270 source6 += pitch; 5271 source7 += pitch; 5272 source8 += pitch; 5273 source9 += pitch; 5274 sourceA += pitch; 5275 sourceB += pitch; 5276 sourceC += pitch; 5277 sourceD += pitch; 5278 sourceE += pitch; 5279 sourceF += pitch; 5280 } 5281 } 5282 else ASSERT(false); 5283 } 5284 } 5285 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F) 5286 { 5287 #if defined(__i386__) || defined(__x86_64__) 5288 if(CPUID::supportsSSE()) 5289 { 5290 if(internal.depth == 2) 5291 { 5292 for(int y = 0; y < height; y++) 5293 { 5294 for(int x = 0; x < width; x++) 5295 { 5296 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5297 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5298 5299 c0 = _mm_add_ps(c0, c1); 5300 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5301 5302 _mm_store_ps((float*)(source0 + 16 * x), c0); 5303 } 5304 5305 source0 += pitch; 5306 source1 += pitch; 5307 } 5308 } 5309 else if(internal.depth == 4) 5310 { 5311 for(int y = 0; y < height; y++) 5312 { 5313 for(int x = 0; x < width; x++) 5314 { 5315 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5316 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5317 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5318 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5319 5320 c0 = _mm_add_ps(c0, c1); 5321 c2 = _mm_add_ps(c2, c3); 5322 c0 = _mm_add_ps(c0, c2); 5323 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5324 5325 _mm_store_ps((float*)(source0 + 16 * x), c0); 5326 } 5327 5328 source0 += pitch; 5329 source1 += pitch; 5330 source2 += pitch; 5331 source3 += pitch; 5332 } 5333 } 5334 else if(internal.depth == 8) 5335 { 5336 for(int y = 0; y < height; y++) 5337 { 5338 for(int x = 0; x < width; x++) 5339 { 5340 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5341 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5342 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5343 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5344 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5345 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5346 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5347 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5348 5349 c0 = _mm_add_ps(c0, c1); 5350 c2 = _mm_add_ps(c2, c3); 5351 c4 = _mm_add_ps(c4, c5); 5352 c6 = _mm_add_ps(c6, c7); 5353 c0 = _mm_add_ps(c0, c2); 5354 c4 = _mm_add_ps(c4, c6); 5355 c0 = _mm_add_ps(c0, c4); 5356 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5357 5358 _mm_store_ps((float*)(source0 + 16 * x), c0); 5359 } 5360 5361 source0 += pitch; 5362 source1 += pitch; 5363 source2 += pitch; 5364 source3 += pitch; 5365 source4 += pitch; 5366 source5 += pitch; 5367 source6 += pitch; 5368 source7 += pitch; 5369 } 5370 } 5371 else if(internal.depth == 16) 5372 { 5373 for(int y = 0; y < height; y++) 5374 { 5375 for(int x = 0; x < width; x++) 5376 { 5377 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5378 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5379 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5380 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5381 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5382 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5383 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5384 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5385 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x)); 5386 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x)); 5387 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x)); 5388 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x)); 5389 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x)); 5390 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x)); 5391 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x)); 5392 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x)); 5393 5394 c0 = _mm_add_ps(c0, c1); 5395 c2 = _mm_add_ps(c2, c3); 5396 c4 = _mm_add_ps(c4, c5); 5397 c6 = _mm_add_ps(c6, c7); 5398 c8 = _mm_add_ps(c8, c9); 5399 cA = _mm_add_ps(cA, cB); 5400 cC = _mm_add_ps(cC, cD); 5401 cE = _mm_add_ps(cE, cF); 5402 c0 = _mm_add_ps(c0, c2); 5403 c4 = _mm_add_ps(c4, c6); 5404 c8 = _mm_add_ps(c8, cA); 5405 cC = _mm_add_ps(cC, cE); 5406 c0 = _mm_add_ps(c0, c4); 5407 c8 = _mm_add_ps(c8, cC); 5408 c0 = _mm_add_ps(c0, c8); 5409 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5410 5411 _mm_store_ps((float*)(source0 + 16 * x), c0); 5412 } 5413 5414 source0 += pitch; 5415 source1 += pitch; 5416 source2 += pitch; 5417 source3 += pitch; 5418 source4 += pitch; 5419 source5 += pitch; 5420 source6 += pitch; 5421 source7 += pitch; 5422 source8 += pitch; 5423 source9 += pitch; 5424 sourceA += pitch; 5425 sourceB += pitch; 5426 sourceC += pitch; 5427 sourceD += pitch; 5428 sourceE += pitch; 5429 sourceF += pitch; 5430 } 5431 } 5432 else ASSERT(false); 5433 } 5434 else 5435 #endif 5436 { 5437 if(internal.depth == 2) 5438 { 5439 for(int y = 0; y < height; y++) 5440 { 5441 for(int x = 0; x < 4 * width; x++) 5442 { 5443 float c0 = *(float*)(source0 + 4 * x); 5444 float c1 = *(float*)(source1 + 4 * x); 5445 5446 c0 = c0 + c1; 5447 c0 *= 1.0f / 2.0f; 5448 5449 *(float*)(source0 + 4 * x) = c0; 5450 } 5451 5452 source0 += pitch; 5453 source1 += pitch; 5454 } 5455 } 5456 else if(internal.depth == 4) 5457 { 5458 for(int y = 0; y < height; y++) 5459 { 5460 for(int x = 0; x < 4 * width; x++) 5461 { 5462 float c0 = *(float*)(source0 + 4 * x); 5463 float c1 = *(float*)(source1 + 4 * x); 5464 float c2 = *(float*)(source2 + 4 * x); 5465 float c3 = *(float*)(source3 + 4 * x); 5466 5467 c0 = c0 + c1; 5468 c2 = c2 + c3; 5469 c0 = c0 + c2; 5470 c0 *= 1.0f / 4.0f; 5471 5472 *(float*)(source0 + 4 * x) = c0; 5473 } 5474 5475 source0 += pitch; 5476 source1 += pitch; 5477 source2 += pitch; 5478 source3 += pitch; 5479 } 5480 } 5481 else if(internal.depth == 8) 5482 { 5483 for(int y = 0; y < height; y++) 5484 { 5485 for(int x = 0; x < 4 * width; x++) 5486 { 5487 float c0 = *(float*)(source0 + 4 * x); 5488 float c1 = *(float*)(source1 + 4 * x); 5489 float c2 = *(float*)(source2 + 4 * x); 5490 float c3 = *(float*)(source3 + 4 * x); 5491 float c4 = *(float*)(source4 + 4 * x); 5492 float c5 = *(float*)(source5 + 4 * x); 5493 float c6 = *(float*)(source6 + 4 * x); 5494 float c7 = *(float*)(source7 + 4 * x); 5495 5496 c0 = c0 + c1; 5497 c2 = c2 + c3; 5498 c4 = c4 + c5; 5499 c6 = c6 + c7; 5500 c0 = c0 + c2; 5501 c4 = c4 + c6; 5502 c0 = c0 + c4; 5503 c0 *= 1.0f / 8.0f; 5504 5505 *(float*)(source0 + 4 * x) = c0; 5506 } 5507 5508 source0 += pitch; 5509 source1 += pitch; 5510 source2 += pitch; 5511 source3 += pitch; 5512 source4 += pitch; 5513 source5 += pitch; 5514 source6 += pitch; 5515 source7 += pitch; 5516 } 5517 } 5518 else if(internal.depth == 16) 5519 { 5520 for(int y = 0; y < height; y++) 5521 { 5522 for(int x = 0; x < 4 * width; x++) 5523 { 5524 float c0 = *(float*)(source0 + 4 * x); 5525 float c1 = *(float*)(source1 + 4 * x); 5526 float c2 = *(float*)(source2 + 4 * x); 5527 float c3 = *(float*)(source3 + 4 * x); 5528 float c4 = *(float*)(source4 + 4 * x); 5529 float c5 = *(float*)(source5 + 4 * x); 5530 float c6 = *(float*)(source6 + 4 * x); 5531 float c7 = *(float*)(source7 + 4 * x); 5532 float c8 = *(float*)(source8 + 4 * x); 5533 float c9 = *(float*)(source9 + 4 * x); 5534 float cA = *(float*)(sourceA + 4 * x); 5535 float cB = *(float*)(sourceB + 4 * x); 5536 float cC = *(float*)(sourceC + 4 * x); 5537 float cD = *(float*)(sourceD + 4 * x); 5538 float cE = *(float*)(sourceE + 4 * x); 5539 float cF = *(float*)(sourceF + 4 * x); 5540 5541 c0 = c0 + c1; 5542 c2 = c2 + c3; 5543 c4 = c4 + c5; 5544 c6 = c6 + c7; 5545 c8 = c8 + c9; 5546 cA = cA + cB; 5547 cC = cC + cD; 5548 cE = cE + cF; 5549 c0 = c0 + c2; 5550 c4 = c4 + c6; 5551 c8 = c8 + cA; 5552 cC = cC + cE; 5553 c0 = c0 + c4; 5554 c8 = c8 + cC; 5555 c0 = c0 + c8; 5556 c0 *= 1.0f / 16.0f; 5557 5558 *(float*)(source0 + 4 * x) = c0; 5559 } 5560 5561 source0 += pitch; 5562 source1 += pitch; 5563 source2 += pitch; 5564 source3 += pitch; 5565 source4 += pitch; 5566 source5 += pitch; 5567 source6 += pitch; 5568 source7 += pitch; 5569 source8 += pitch; 5570 source9 += pitch; 5571 sourceA += pitch; 5572 sourceB += pitch; 5573 sourceC += pitch; 5574 sourceD += pitch; 5575 sourceE += pitch; 5576 sourceF += pitch; 5577 } 5578 } 5579 else ASSERT(false); 5580 } 5581 } 5582 else if(internal.format == FORMAT_R5G6B5) 5583 { 5584 #if defined(__i386__) || defined(__x86_64__) 5585 if(CPUID::supportsSSE2() && (width % 8) == 0) 5586 { 5587 if(internal.depth == 2) 5588 { 5589 for(int y = 0; y < height; y++) 5590 { 5591 for(int x = 0; x < width; x += 8) 5592 { 5593 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5594 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5595 5596 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5597 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5598 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5599 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5600 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5601 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5602 5603 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5604 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5605 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5606 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5607 c0 = _mm_or_si128(c0, c1); 5608 5609 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5610 } 5611 5612 source0 += pitch; 5613 source1 += pitch; 5614 } 5615 } 5616 else if(internal.depth == 4) 5617 { 5618 for(int y = 0; y < height; y++) 5619 { 5620 for(int x = 0; x < width; x += 8) 5621 { 5622 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5623 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5624 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5625 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5626 5627 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5628 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5629 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5630 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5631 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5632 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5633 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5634 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5635 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5636 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5637 5638 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5639 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5640 c0 = _mm_avg_epu8(c0, c2); 5641 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5642 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5643 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5644 c1 = _mm_avg_epu16(c1, c3); 5645 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5646 c0 = _mm_or_si128(c0, c1); 5647 5648 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5649 } 5650 5651 source0 += pitch; 5652 source1 += pitch; 5653 source2 += pitch; 5654 source3 += pitch; 5655 } 5656 } 5657 else if(internal.depth == 8) 5658 { 5659 for(int y = 0; y < height; y++) 5660 { 5661 for(int x = 0; x < width; x += 8) 5662 { 5663 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5664 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5665 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5666 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5667 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5668 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5669 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5670 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5671 5672 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5673 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5674 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5675 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5676 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5677 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5678 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5679 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5680 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5681 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5682 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5683 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5684 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5685 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5686 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5687 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5688 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5689 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5690 5691 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5692 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5693 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5694 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5695 c0 = _mm_avg_epu8(c0, c2); 5696 c4 = _mm_avg_epu8(c4, c6); 5697 c0 = _mm_avg_epu8(c0, c4); 5698 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5699 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5700 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5701 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5702 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5703 c1 = _mm_avg_epu16(c1, c3); 5704 c5 = _mm_avg_epu16(c5, c7); 5705 c1 = _mm_avg_epu16(c1, c5); 5706 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5707 c0 = _mm_or_si128(c0, c1); 5708 5709 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5710 } 5711 5712 source0 += pitch; 5713 source1 += pitch; 5714 source2 += pitch; 5715 source3 += pitch; 5716 source4 += pitch; 5717 source5 += pitch; 5718 source6 += pitch; 5719 source7 += pitch; 5720 } 5721 } 5722 else if(internal.depth == 16) 5723 { 5724 for(int y = 0; y < height; y++) 5725 { 5726 for(int x = 0; x < width; x += 8) 5727 { 5728 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5729 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5730 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5731 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5732 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5733 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5734 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5735 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5736 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x)); 5737 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x)); 5738 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x)); 5739 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x)); 5740 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x)); 5741 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x)); 5742 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x)); 5743 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x)); 5744 5745 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5746 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5747 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5748 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5749 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5750 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5751 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5752 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5753 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5754 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5755 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5756 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5757 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5758 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5759 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5760 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5761 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5762 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5763 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b)); 5764 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_)); 5765 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b)); 5766 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_)); 5767 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b)); 5768 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_)); 5769 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b)); 5770 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_)); 5771 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b)); 5772 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_)); 5773 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b)); 5774 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_)); 5775 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b)); 5776 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_)); 5777 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b)); 5778 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_)); 5779 5780 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5781 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5782 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5783 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5784 c8 = _mm_avg_epu8(c8_r_b, c9_r_b); 5785 cA = _mm_avg_epu8(cA_r_b, cB_r_b); 5786 cC = _mm_avg_epu8(cC_r_b, cD_r_b); 5787 cE = _mm_avg_epu8(cE_r_b, cF_r_b); 5788 c0 = _mm_avg_epu8(c0, c2); 5789 c4 = _mm_avg_epu8(c4, c6); 5790 c8 = _mm_avg_epu8(c8, cA); 5791 cC = _mm_avg_epu8(cC, cE); 5792 c0 = _mm_avg_epu8(c0, c4); 5793 c8 = _mm_avg_epu8(c8, cC); 5794 c0 = _mm_avg_epu8(c0, c8); 5795 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5796 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5797 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5798 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5799 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5800 c9 = _mm_avg_epu16(c8__g_, c9__g_); 5801 cB = _mm_avg_epu16(cA__g_, cB__g_); 5802 cD = _mm_avg_epu16(cC__g_, cD__g_); 5803 cF = _mm_avg_epu16(cE__g_, cF__g_); 5804 c1 = _mm_avg_epu8(c1, c3); 5805 c5 = _mm_avg_epu8(c5, c7); 5806 c9 = _mm_avg_epu8(c9, cB); 5807 cD = _mm_avg_epu8(cD, cF); 5808 c1 = _mm_avg_epu8(c1, c5); 5809 c9 = _mm_avg_epu8(c9, cD); 5810 c1 = _mm_avg_epu8(c1, c9); 5811 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5812 c0 = _mm_or_si128(c0, c1); 5813 5814 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5815 } 5816 5817 source0 += pitch; 5818 source1 += pitch; 5819 source2 += pitch; 5820 source3 += pitch; 5821 source4 += pitch; 5822 source5 += pitch; 5823 source6 += pitch; 5824 source7 += pitch; 5825 source8 += pitch; 5826 source9 += pitch; 5827 sourceA += pitch; 5828 sourceB += pitch; 5829 sourceC += pitch; 5830 sourceD += pitch; 5831 sourceE += pitch; 5832 sourceF += pitch; 5833 } 5834 } 5835 else ASSERT(false); 5836 } 5837 else 5838 #endif 5839 { 5840 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821)) 5841 5842 if(internal.depth == 2) 5843 { 5844 for(int y = 0; y < height; y++) 5845 { 5846 for(int x = 0; x < width; x++) 5847 { 5848 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5849 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5850 5851 c0 = AVERAGE(c0, c1); 5852 5853 *(unsigned short*)(source0 + 2 * x) = c0; 5854 } 5855 5856 source0 += pitch; 5857 source1 += pitch; 5858 } 5859 } 5860 else if(internal.depth == 4) 5861 { 5862 for(int y = 0; y < height; y++) 5863 { 5864 for(int x = 0; x < width; x++) 5865 { 5866 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5867 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5868 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5869 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5870 5871 c0 = AVERAGE(c0, c1); 5872 c2 = AVERAGE(c2, c3); 5873 c0 = AVERAGE(c0, c2); 5874 5875 *(unsigned short*)(source0 + 2 * x) = c0; 5876 } 5877 5878 source0 += pitch; 5879 source1 += pitch; 5880 source2 += pitch; 5881 source3 += pitch; 5882 } 5883 } 5884 else if(internal.depth == 8) 5885 { 5886 for(int y = 0; y < height; y++) 5887 { 5888 for(int x = 0; x < width; x++) 5889 { 5890 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5891 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5892 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5893 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5894 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5895 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5896 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5897 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5898 5899 c0 = AVERAGE(c0, c1); 5900 c2 = AVERAGE(c2, c3); 5901 c4 = AVERAGE(c4, c5); 5902 c6 = AVERAGE(c6, c7); 5903 c0 = AVERAGE(c0, c2); 5904 c4 = AVERAGE(c4, c6); 5905 c0 = AVERAGE(c0, c4); 5906 5907 *(unsigned short*)(source0 + 2 * x) = c0; 5908 } 5909 5910 source0 += pitch; 5911 source1 += pitch; 5912 source2 += pitch; 5913 source3 += pitch; 5914 source4 += pitch; 5915 source5 += pitch; 5916 source6 += pitch; 5917 source7 += pitch; 5918 } 5919 } 5920 else if(internal.depth == 16) 5921 { 5922 for(int y = 0; y < height; y++) 5923 { 5924 for(int x = 0; x < width; x++) 5925 { 5926 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5927 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5928 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5929 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5930 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5931 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5932 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5933 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5934 unsigned short c8 = *(unsigned short*)(source8 + 2 * x); 5935 unsigned short c9 = *(unsigned short*)(source9 + 2 * x); 5936 unsigned short cA = *(unsigned short*)(sourceA + 2 * x); 5937 unsigned short cB = *(unsigned short*)(sourceB + 2 * x); 5938 unsigned short cC = *(unsigned short*)(sourceC + 2 * x); 5939 unsigned short cD = *(unsigned short*)(sourceD + 2 * x); 5940 unsigned short cE = *(unsigned short*)(sourceE + 2 * x); 5941 unsigned short cF = *(unsigned short*)(sourceF + 2 * x); 5942 5943 c0 = AVERAGE(c0, c1); 5944 c2 = AVERAGE(c2, c3); 5945 c4 = AVERAGE(c4, c5); 5946 c6 = AVERAGE(c6, c7); 5947 c8 = AVERAGE(c8, c9); 5948 cA = AVERAGE(cA, cB); 5949 cC = AVERAGE(cC, cD); 5950 cE = AVERAGE(cE, cF); 5951 c0 = AVERAGE(c0, c2); 5952 c4 = AVERAGE(c4, c6); 5953 c8 = AVERAGE(c8, cA); 5954 cC = AVERAGE(cC, cE); 5955 c0 = AVERAGE(c0, c4); 5956 c8 = AVERAGE(c8, cC); 5957 c0 = AVERAGE(c0, c8); 5958 5959 *(unsigned short*)(source0 + 2 * x) = c0; 5960 } 5961 5962 source0 += pitch; 5963 source1 += pitch; 5964 source2 += pitch; 5965 source3 += pitch; 5966 source4 += pitch; 5967 source5 += pitch; 5968 source6 += pitch; 5969 source7 += pitch; 5970 source8 += pitch; 5971 source9 += pitch; 5972 sourceA += pitch; 5973 sourceB += pitch; 5974 sourceC += pitch; 5975 sourceD += pitch; 5976 sourceE += pitch; 5977 sourceF += pitch; 5978 } 5979 } 5980 else ASSERT(false); 5981 5982 #undef AVERAGE 5983 } 5984 } 5985 else 5986 { 5987 // UNIMPLEMENTED(); 5988 } 5989 } 5990 } 5991