1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Surface.hpp" 16 17 #include "Color.hpp" 18 #include "Context.hpp" 19 #include "ETC_Decoder.hpp" 20 #include "Renderer.hpp" 21 #include "Common/Half.hpp" 22 #include "Common/Memory.hpp" 23 #include "Common/CPUID.hpp" 24 #include "Common/Resource.hpp" 25 #include "Common/Debug.hpp" 26 #include "Reactor/Reactor.hpp" 27 28 #if defined(__i386__) || defined(__x86_64__) 29 #include <xmmintrin.h> 30 #include <emmintrin.h> 31 #endif 32 33 #undef min 34 #undef max 35 36 namespace sw 37 { 38 extern bool quadLayoutEnabled; 39 extern bool complementaryDepthBuffer; 40 extern TranscendentalPrecision logPrecision; 41 42 unsigned int *Surface::palette = 0; 43 unsigned int Surface::paletteID = 0; 44 write(int x,int y,int z,const Color<float> & color)45 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color) 46 { 47 ASSERT((x >= -border) && (x < (width + border))); 48 ASSERT((y >= -border) && (y < (height + border))); 49 ASSERT((z >= 0) && (z < depth)); 50 51 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB; 52 53 for(int i = 0; i < samples; i++) 54 { 55 write(element, color); 56 element += sliceB; 57 } 58 } 59 write(int x,int y,const Color<float> & color)60 void Surface::Buffer::write(int x, int y, const Color<float> &color) 61 { 62 ASSERT((x >= -border) && (x < (width + border))); 63 ASSERT((y >= -border) && (y < (height + border))); 64 65 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB; 66 67 for(int i = 0; i < samples; i++) 68 { 69 write(element, color); 70 element += sliceB; 71 } 72 } 73 write(void * element,const Color<float> & color)74 inline void Surface::Buffer::write(void *element, const Color<float> &color) 75 { 76 float r = color.r; 77 float g = color.g; 78 float b = color.b; 79 float a = color.a; 80 81 if(isSRGBformat(format)) 82 { 83 r = linearToSRGB(r); 84 g = linearToSRGB(g); 85 b = linearToSRGB(b); 86 } 87 88 switch(format) 89 { 90 case FORMAT_A8: 91 *(unsigned char*)element = unorm<8>(a); 92 break; 93 case FORMAT_R8_SNORM: 94 *(char*)element = snorm<8>(r); 95 break; 96 case FORMAT_R8: 97 *(unsigned char*)element = unorm<8>(r); 98 break; 99 case FORMAT_R8I: 100 *(char*)element = scast<8>(r); 101 break; 102 case FORMAT_R8UI: 103 *(unsigned char*)element = ucast<8>(r); 104 break; 105 case FORMAT_R16I: 106 *(short*)element = scast<16>(r); 107 break; 108 case FORMAT_R16UI: 109 *(unsigned short*)element = ucast<16>(r); 110 break; 111 case FORMAT_R32I: 112 *(int*)element = static_cast<int>(r); 113 break; 114 case FORMAT_R32UI: 115 *(unsigned int*)element = static_cast<unsigned int>(r); 116 break; 117 case FORMAT_R3G3B2: 118 *(unsigned char*)element = (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0); 119 break; 120 case FORMAT_A8R3G3B2: 121 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0); 122 break; 123 case FORMAT_X4R4G4B4: 124 *(unsigned short*)element = 0xF000 | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0); 125 break; 126 case FORMAT_A4R4G4B4: 127 *(unsigned short*)element = (unorm<4>(a) << 12) | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0); 128 break; 129 case FORMAT_R4G4B4A4: 130 *(unsigned short*)element = (unorm<4>(r) << 12) | (unorm<4>(g) << 8) | (unorm<4>(b) << 4) | (unorm<4>(a) << 0); 131 break; 132 case FORMAT_R5G6B5: 133 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<6>(g) << 5) | (unorm<5>(b) << 0); 134 break; 135 case FORMAT_A1R5G5B5: 136 *(unsigned short*)element = (unorm<1>(a) << 15) | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0); 137 break; 138 case FORMAT_R5G5B5A1: 139 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<5>(g) << 6) | (unorm<5>(b) << 1) | (unorm<5>(a) << 0); 140 break; 141 case FORMAT_X1R5G5B5: 142 *(unsigned short*)element = 0x8000 | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0); 143 break; 144 case FORMAT_A8R8G8B8: 145 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0); 146 break; 147 case FORMAT_X8R8G8B8: 148 *(unsigned int*)element = 0xFF000000 | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0); 149 break; 150 case FORMAT_A8B8G8R8_SNORM: 151 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(a)) << 24) | 152 (static_cast<unsigned int>(snorm<8>(b)) << 16) | 153 (static_cast<unsigned int>(snorm<8>(g)) << 8) | 154 (static_cast<unsigned int>(snorm<8>(r)) << 0); 155 break; 156 case FORMAT_A8B8G8R8: 157 case FORMAT_SRGB8_A8: 158 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 159 break; 160 case FORMAT_A8B8G8R8I: 161 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(a)) << 24) | 162 (static_cast<unsigned int>(scast<8>(b)) << 16) | 163 (static_cast<unsigned int>(scast<8>(g)) << 8) | 164 (static_cast<unsigned int>(scast<8>(r)) << 0); 165 break; 166 case FORMAT_A8B8G8R8UI: 167 *(unsigned int*)element = (ucast<8>(a) << 24) | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 168 break; 169 case FORMAT_X8B8G8R8_SNORM: 170 *(unsigned int*)element = 0x7F000000 | 171 (static_cast<unsigned int>(snorm<8>(b)) << 16) | 172 (static_cast<unsigned int>(snorm<8>(g)) << 8) | 173 (static_cast<unsigned int>(snorm<8>(r)) << 0); 174 break; 175 case FORMAT_X8B8G8R8: 176 case FORMAT_SRGB8_X8: 177 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 178 break; 179 case FORMAT_X8B8G8R8I: 180 *(unsigned int*)element = 0x7F000000 | 181 (static_cast<unsigned int>(scast<8>(b)) << 16) | 182 (static_cast<unsigned int>(scast<8>(g)) << 8) | 183 (static_cast<unsigned int>(scast<8>(r)) << 0); 184 case FORMAT_X8B8G8R8UI: 185 *(unsigned int*)element = 0xFF000000 | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 186 break; 187 case FORMAT_A2R10G10B10: 188 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(r) << 20) | (unorm<10>(g) << 10) | (unorm<10>(b) << 0); 189 break; 190 case FORMAT_A2B10G10R10: 191 case FORMAT_A2B10G10R10UI: 192 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(b) << 20) | (unorm<10>(g) << 10) | (unorm<10>(r) << 0); 193 break; 194 case FORMAT_G8R8_SNORM: 195 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(g)) << 8) | 196 (static_cast<unsigned short>(snorm<8>(r)) << 0); 197 break; 198 case FORMAT_G8R8: 199 *(unsigned short*)element = (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 200 break; 201 case FORMAT_G8R8I: 202 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(g)) << 8) | 203 (static_cast<unsigned short>(scast<8>(r)) << 0); 204 break; 205 case FORMAT_G8R8UI: 206 *(unsigned short*)element = (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 207 break; 208 case FORMAT_G16R16: 209 *(unsigned int*)element = (unorm<16>(g) << 16) | (unorm<16>(r) << 0); 210 break; 211 case FORMAT_G16R16I: 212 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(g)) << 16) | 213 (static_cast<unsigned int>(scast<16>(r)) << 0); 214 break; 215 case FORMAT_G16R16UI: 216 *(unsigned int*)element = (ucast<16>(g) << 16) | (ucast<16>(r) << 0); 217 break; 218 case FORMAT_G32R32I: 219 case FORMAT_G32R32UI: 220 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 221 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 222 break; 223 case FORMAT_A16B16G16R16: 224 ((unsigned short*)element)[0] = unorm<16>(r); 225 ((unsigned short*)element)[1] = unorm<16>(g); 226 ((unsigned short*)element)[2] = unorm<16>(b); 227 ((unsigned short*)element)[3] = unorm<16>(a); 228 break; 229 case FORMAT_A16B16G16R16I: 230 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r)); 231 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g)); 232 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b)); 233 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(a)); 234 break; 235 case FORMAT_A16B16G16R16UI: 236 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r)); 237 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g)); 238 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b)); 239 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(a)); 240 break; 241 case FORMAT_X16B16G16R16I: 242 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r)); 243 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g)); 244 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b)); 245 break; 246 case FORMAT_X16B16G16R16UI: 247 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r)); 248 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g)); 249 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b)); 250 break; 251 case FORMAT_A32B32G32R32I: 252 case FORMAT_A32B32G32R32UI: 253 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 254 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 255 ((unsigned int*)element)[2] = static_cast<unsigned int>(b); 256 ((unsigned int*)element)[3] = static_cast<unsigned int>(a); 257 break; 258 case FORMAT_X32B32G32R32I: 259 case FORMAT_X32B32G32R32UI: 260 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 261 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 262 ((unsigned int*)element)[2] = static_cast<unsigned int>(b); 263 break; 264 case FORMAT_V8U8: 265 *(unsigned short*)element = (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 266 break; 267 case FORMAT_L6V5U5: 268 *(unsigned short*)element = (unorm<6>(b) << 10) | (snorm<5>(g) << 5) | (snorm<5>(r) << 0); 269 break; 270 case FORMAT_Q8W8V8U8: 271 *(unsigned int*)element = (snorm<8>(a) << 24) | (snorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 272 break; 273 case FORMAT_X8L8V8U8: 274 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 275 break; 276 case FORMAT_V16U16: 277 *(unsigned int*)element = (snorm<16>(g) << 16) | (snorm<16>(r) << 0); 278 break; 279 case FORMAT_A2W10V10U10: 280 *(unsigned int*)element = (unorm<2>(a) << 30) | (snorm<10>(b) << 20) | (snorm<10>(g) << 10) | (snorm<10>(r) << 0); 281 break; 282 case FORMAT_A16W16V16U16: 283 ((unsigned short*)element)[0] = snorm<16>(r); 284 ((unsigned short*)element)[1] = snorm<16>(g); 285 ((unsigned short*)element)[2] = snorm<16>(b); 286 ((unsigned short*)element)[3] = unorm<16>(a); 287 break; 288 case FORMAT_Q16W16V16U16: 289 ((unsigned short*)element)[0] = snorm<16>(r); 290 ((unsigned short*)element)[1] = snorm<16>(g); 291 ((unsigned short*)element)[2] = snorm<16>(b); 292 ((unsigned short*)element)[3] = snorm<16>(a); 293 break; 294 case FORMAT_R8G8B8: 295 ((unsigned char*)element)[0] = unorm<8>(b); 296 ((unsigned char*)element)[1] = unorm<8>(g); 297 ((unsigned char*)element)[2] = unorm<8>(r); 298 break; 299 case FORMAT_B8G8R8: 300 ((unsigned char*)element)[0] = unorm<8>(r); 301 ((unsigned char*)element)[1] = unorm<8>(g); 302 ((unsigned char*)element)[2] = unorm<8>(b); 303 break; 304 case FORMAT_R16F: 305 *(half*)element = (half)r; 306 break; 307 case FORMAT_A16F: 308 *(half*)element = (half)a; 309 break; 310 case FORMAT_G16R16F: 311 ((half*)element)[0] = (half)r; 312 ((half*)element)[1] = (half)g; 313 break; 314 case FORMAT_X16B16G16R16F_UNSIGNED: 315 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f); 316 // Fall through to FORMAT_X16B16G16R16F. 317 case FORMAT_X16B16G16R16F: 318 ((half*)element)[3] = 1.0f; 319 // Fall through to FORMAT_B16G16R16F. 320 case FORMAT_B16G16R16F: 321 ((half*)element)[0] = (half)r; 322 ((half*)element)[1] = (half)g; 323 ((half*)element)[2] = (half)b; 324 break; 325 case FORMAT_A16B16G16R16F: 326 ((half*)element)[0] = (half)r; 327 ((half*)element)[1] = (half)g; 328 ((half*)element)[2] = (half)b; 329 ((half*)element)[3] = (half)a; 330 break; 331 case FORMAT_A32F: 332 *(float*)element = a; 333 break; 334 case FORMAT_R32F: 335 *(float*)element = r; 336 break; 337 case FORMAT_G32R32F: 338 ((float*)element)[0] = r; 339 ((float*)element)[1] = g; 340 break; 341 case FORMAT_X32B32G32R32F_UNSIGNED: 342 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f); 343 // Fall through to FORMAT_X32B32G32R32F. 344 case FORMAT_X32B32G32R32F: 345 ((float*)element)[3] = 1.0f; 346 // Fall through to FORMAT_B32G32R32F. 347 case FORMAT_B32G32R32F: 348 ((float*)element)[0] = r; 349 ((float*)element)[1] = g; 350 ((float*)element)[2] = b; 351 break; 352 case FORMAT_A32B32G32R32F: 353 ((float*)element)[0] = r; 354 ((float*)element)[1] = g; 355 ((float*)element)[2] = b; 356 ((float*)element)[3] = a; 357 break; 358 case FORMAT_D32F: 359 case FORMAT_D32FS8: 360 case FORMAT_D32F_LOCKABLE: 361 case FORMAT_D32FS8_TEXTURE: 362 case FORMAT_D32F_SHADOW: 363 case FORMAT_D32FS8_SHADOW: 364 *((float*)element) = r; 365 break; 366 case FORMAT_D32F_COMPLEMENTARY: 367 case FORMAT_D32FS8_COMPLEMENTARY: 368 *((float*)element) = 1 - r; 369 break; 370 case FORMAT_S8: 371 *((unsigned char*)element) = unorm<8>(r); 372 break; 373 case FORMAT_L8: 374 *(unsigned char*)element = unorm<8>(r); 375 break; 376 case FORMAT_A4L4: 377 *(unsigned char*)element = (unorm<4>(a) << 4) | (unorm<4>(r) << 0); 378 break; 379 case FORMAT_L16: 380 *(unsigned short*)element = unorm<16>(r); 381 break; 382 case FORMAT_A8L8: 383 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<8>(r) << 0); 384 break; 385 case FORMAT_L16F: 386 *(half*)element = (half)r; 387 break; 388 case FORMAT_A16L16F: 389 ((half*)element)[0] = (half)r; 390 ((half*)element)[1] = (half)a; 391 break; 392 case FORMAT_L32F: 393 *(float*)element = r; 394 break; 395 case FORMAT_A32L32F: 396 ((float*)element)[0] = r; 397 ((float*)element)[1] = a; 398 break; 399 default: 400 ASSERT(false); 401 } 402 } 403 read(int x,int y,int z) const404 Color<float> Surface::Buffer::read(int x, int y, int z) const 405 { 406 ASSERT((x >= -border) && (x < (width + border))); 407 ASSERT((y >= -border) && (y < (height + border))); 408 ASSERT((z >= 0) && (z < depth)); 409 410 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB; 411 412 return read(element); 413 } 414 read(int x,int y) const415 Color<float> Surface::Buffer::read(int x, int y) const 416 { 417 ASSERT((x >= -border) && (x < (width + border))); 418 ASSERT((y >= -border) && (y < (height + border))); 419 420 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB; 421 422 return read(element); 423 } 424 read(void * element) const425 inline Color<float> Surface::Buffer::read(void *element) const 426 { 427 float r = 0.0f; 428 float g = 0.0f; 429 float b = 0.0f; 430 float a = 1.0f; 431 432 switch(format) 433 { 434 case FORMAT_P8: 435 { 436 ASSERT(palette); 437 438 unsigned int abgr = palette[*(unsigned char*)element]; 439 440 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 441 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 442 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 443 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 444 } 445 break; 446 case FORMAT_A8P8: 447 { 448 ASSERT(palette); 449 450 unsigned int bgr = palette[((unsigned char*)element)[0]]; 451 452 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF); 453 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00); 454 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000); 455 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 456 } 457 break; 458 case FORMAT_A8: 459 r = 0; 460 g = 0; 461 b = 0; 462 a = *(unsigned char*)element * (1.0f / 0xFF); 463 break; 464 case FORMAT_R8_SNORM: 465 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f); 466 break; 467 case FORMAT_R8: 468 r = *(unsigned char*)element * (1.0f / 0xFF); 469 break; 470 case FORMAT_R8I: 471 r = *(signed char*)element; 472 break; 473 case FORMAT_R8UI: 474 r = *(unsigned char*)element; 475 break; 476 case FORMAT_R3G3B2: 477 { 478 unsigned char rgb = *(unsigned char*)element; 479 480 r = (rgb & 0xE0) * (1.0f / 0xE0); 481 g = (rgb & 0x1C) * (1.0f / 0x1C); 482 b = (rgb & 0x03) * (1.0f / 0x03); 483 } 484 break; 485 case FORMAT_A8R3G3B2: 486 { 487 unsigned short argb = *(unsigned short*)element; 488 489 a = (argb & 0xFF00) * (1.0f / 0xFF00); 490 r = (argb & 0x00E0) * (1.0f / 0x00E0); 491 g = (argb & 0x001C) * (1.0f / 0x001C); 492 b = (argb & 0x0003) * (1.0f / 0x0003); 493 } 494 break; 495 case FORMAT_X4R4G4B4: 496 { 497 unsigned short rgb = *(unsigned short*)element; 498 499 r = (rgb & 0x0F00) * (1.0f / 0x0F00); 500 g = (rgb & 0x00F0) * (1.0f / 0x00F0); 501 b = (rgb & 0x000F) * (1.0f / 0x000F); 502 } 503 break; 504 case FORMAT_A4R4G4B4: 505 { 506 unsigned short argb = *(unsigned short*)element; 507 508 a = (argb & 0xF000) * (1.0f / 0xF000); 509 r = (argb & 0x0F00) * (1.0f / 0x0F00); 510 g = (argb & 0x00F0) * (1.0f / 0x00F0); 511 b = (argb & 0x000F) * (1.0f / 0x000F); 512 } 513 break; 514 case FORMAT_R4G4B4A4: 515 { 516 unsigned short rgba = *(unsigned short*)element; 517 518 r = (rgba & 0xF000) * (1.0f / 0xF000); 519 g = (rgba & 0x0F00) * (1.0f / 0x0F00); 520 b = (rgba & 0x00F0) * (1.0f / 0x00F0); 521 a = (rgba & 0x000F) * (1.0f / 0x000F); 522 } 523 break; 524 case FORMAT_R5G6B5: 525 { 526 unsigned short rgb = *(unsigned short*)element; 527 528 r = (rgb & 0xF800) * (1.0f / 0xF800); 529 g = (rgb & 0x07E0) * (1.0f / 0x07E0); 530 b = (rgb & 0x001F) * (1.0f / 0x001F); 531 } 532 break; 533 case FORMAT_A1R5G5B5: 534 { 535 unsigned short argb = *(unsigned short*)element; 536 537 a = (argb & 0x8000) * (1.0f / 0x8000); 538 r = (argb & 0x7C00) * (1.0f / 0x7C00); 539 g = (argb & 0x03E0) * (1.0f / 0x03E0); 540 b = (argb & 0x001F) * (1.0f / 0x001F); 541 } 542 break; 543 case FORMAT_R5G5B5A1: 544 { 545 unsigned short rgba = *(unsigned short*)element; 546 547 r = (rgba & 0xF800) * (1.0f / 0xF800); 548 g = (rgba & 0x07C0) * (1.0f / 0x07C0); 549 b = (rgba & 0x003E) * (1.0f / 0x003E); 550 a = (rgba & 0x0001) * (1.0f / 0x0001); 551 } 552 break; 553 case FORMAT_X1R5G5B5: 554 { 555 unsigned short xrgb = *(unsigned short*)element; 556 557 r = (xrgb & 0x7C00) * (1.0f / 0x7C00); 558 g = (xrgb & 0x03E0) * (1.0f / 0x03E0); 559 b = (xrgb & 0x001F) * (1.0f / 0x001F); 560 } 561 break; 562 case FORMAT_A8R8G8B8: 563 { 564 unsigned int argb = *(unsigned int*)element; 565 566 a = (argb & 0xFF000000) * (1.0f / 0xFF000000); 567 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000); 568 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00); 569 b = (argb & 0x000000FF) * (1.0f / 0x000000FF); 570 } 571 break; 572 case FORMAT_X8R8G8B8: 573 { 574 unsigned int xrgb = *(unsigned int*)element; 575 576 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000); 577 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00); 578 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF); 579 } 580 break; 581 case FORMAT_A8B8G8R8_SNORM: 582 { 583 signed char* abgr = (signed char*)element; 584 585 r = max(abgr[0] * (1.0f / 0x7F), -1.0f); 586 g = max(abgr[1] * (1.0f / 0x7F), -1.0f); 587 b = max(abgr[2] * (1.0f / 0x7F), -1.0f); 588 a = max(abgr[3] * (1.0f / 0x7F), -1.0f); 589 } 590 break; 591 case FORMAT_A8B8G8R8: 592 case FORMAT_SRGB8_A8: 593 { 594 unsigned int abgr = *(unsigned int*)element; 595 596 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 597 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 598 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 599 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 600 } 601 break; 602 case FORMAT_A8B8G8R8I: 603 { 604 signed char* abgr = (signed char*)element; 605 606 r = abgr[0]; 607 g = abgr[1]; 608 b = abgr[2]; 609 a = abgr[3]; 610 } 611 break; 612 case FORMAT_A8B8G8R8UI: 613 { 614 unsigned char* abgr = (unsigned char*)element; 615 616 r = abgr[0]; 617 g = abgr[1]; 618 b = abgr[2]; 619 a = abgr[3]; 620 } 621 break; 622 case FORMAT_X8B8G8R8_SNORM: 623 { 624 signed char* bgr = (signed char*)element; 625 626 r = max(bgr[0] * (1.0f / 0x7F), -1.0f); 627 g = max(bgr[1] * (1.0f / 0x7F), -1.0f); 628 b = max(bgr[2] * (1.0f / 0x7F), -1.0f); 629 } 630 break; 631 case FORMAT_X8B8G8R8: 632 case FORMAT_SRGB8_X8: 633 { 634 unsigned int xbgr = *(unsigned int*)element; 635 636 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000); 637 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00); 638 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF); 639 } 640 break; 641 case FORMAT_X8B8G8R8I: 642 { 643 signed char* bgr = (signed char*)element; 644 645 r = bgr[0]; 646 g = bgr[1]; 647 b = bgr[2]; 648 } 649 break; 650 case FORMAT_X8B8G8R8UI: 651 { 652 unsigned char* bgr = (unsigned char*)element; 653 654 r = bgr[0]; 655 g = bgr[1]; 656 b = bgr[2]; 657 } 658 break; 659 case FORMAT_G8R8_SNORM: 660 { 661 signed char* gr = (signed char*)element; 662 663 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00); 664 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF); 665 } 666 break; 667 case FORMAT_G8R8: 668 { 669 unsigned short gr = *(unsigned short*)element; 670 671 g = (gr & 0xFF00) * (1.0f / 0xFF00); 672 r = (gr & 0x00FF) * (1.0f / 0x00FF); 673 } 674 break; 675 case FORMAT_G8R8I: 676 { 677 signed char* gr = (signed char*)element; 678 679 r = gr[0]; 680 g = gr[1]; 681 } 682 break; 683 case FORMAT_G8R8UI: 684 { 685 unsigned char* gr = (unsigned char*)element; 686 687 r = gr[0]; 688 g = gr[1]; 689 } 690 break; 691 case FORMAT_R16I: 692 r = *((short*)element); 693 break; 694 case FORMAT_R16UI: 695 r = *((unsigned short*)element); 696 break; 697 case FORMAT_G16R16I: 698 { 699 short* gr = (short*)element; 700 701 r = gr[0]; 702 g = gr[1]; 703 } 704 break; 705 case FORMAT_G16R16: 706 { 707 unsigned int gr = *(unsigned int*)element; 708 709 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000); 710 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF); 711 } 712 break; 713 case FORMAT_G16R16UI: 714 { 715 unsigned short* gr = (unsigned short*)element; 716 717 r = gr[0]; 718 g = gr[1]; 719 } 720 break; 721 case FORMAT_A2R10G10B10: 722 { 723 unsigned int argb = *(unsigned int*)element; 724 725 a = (argb & 0xC0000000) * (1.0f / 0xC0000000); 726 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000); 727 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00); 728 b = (argb & 0x000003FF) * (1.0f / 0x000003FF); 729 } 730 break; 731 case FORMAT_A2B10G10R10: 732 { 733 unsigned int abgr = *(unsigned int*)element; 734 735 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000); 736 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000); 737 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00); 738 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF); 739 } 740 break; 741 case FORMAT_A2B10G10R10UI: 742 { 743 unsigned int abgr = *(unsigned int*)element; 744 745 a = static_cast<float>((abgr & 0xC0000000) >> 30); 746 b = static_cast<float>((abgr & 0x3FF00000) >> 20); 747 g = static_cast<float>((abgr & 0x000FFC00) >> 10); 748 r = static_cast<float>(abgr & 0x000003FF); 749 } 750 break; 751 case FORMAT_A16B16G16R16I: 752 { 753 short* abgr = (short*)element; 754 755 r = abgr[0]; 756 g = abgr[1]; 757 b = abgr[2]; 758 a = abgr[3]; 759 } 760 break; 761 case FORMAT_A16B16G16R16: 762 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF); 763 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF); 764 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF); 765 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 766 break; 767 case FORMAT_A16B16G16R16UI: 768 { 769 unsigned short* abgr = (unsigned short*)element; 770 771 r = abgr[0]; 772 g = abgr[1]; 773 b = abgr[2]; 774 a = abgr[3]; 775 } 776 break; 777 case FORMAT_X16B16G16R16I: 778 { 779 short* bgr = (short*)element; 780 781 r = bgr[0]; 782 g = bgr[1]; 783 b = bgr[2]; 784 } 785 break; 786 case FORMAT_X16B16G16R16UI: 787 { 788 unsigned short* bgr = (unsigned short*)element; 789 790 r = bgr[0]; 791 g = bgr[1]; 792 b = bgr[2]; 793 } 794 break; 795 case FORMAT_A32B32G32R32I: 796 { 797 int* abgr = (int*)element; 798 799 r = static_cast<float>(abgr[0]); 800 g = static_cast<float>(abgr[1]); 801 b = static_cast<float>(abgr[2]); 802 a = static_cast<float>(abgr[3]); 803 } 804 break; 805 case FORMAT_A32B32G32R32UI: 806 { 807 unsigned int* abgr = (unsigned int*)element; 808 809 r = static_cast<float>(abgr[0]); 810 g = static_cast<float>(abgr[1]); 811 b = static_cast<float>(abgr[2]); 812 a = static_cast<float>(abgr[3]); 813 } 814 break; 815 case FORMAT_X32B32G32R32I: 816 { 817 int* bgr = (int*)element; 818 819 r = static_cast<float>(bgr[0]); 820 g = static_cast<float>(bgr[1]); 821 b = static_cast<float>(bgr[2]); 822 } 823 break; 824 case FORMAT_X32B32G32R32UI: 825 { 826 unsigned int* bgr = (unsigned int*)element; 827 828 r = static_cast<float>(bgr[0]); 829 g = static_cast<float>(bgr[1]); 830 b = static_cast<float>(bgr[2]); 831 } 832 break; 833 case FORMAT_G32R32I: 834 { 835 int* gr = (int*)element; 836 837 r = static_cast<float>(gr[0]); 838 g = static_cast<float>(gr[1]); 839 } 840 break; 841 case FORMAT_G32R32UI: 842 { 843 unsigned int* gr = (unsigned int*)element; 844 845 r = static_cast<float>(gr[0]); 846 g = static_cast<float>(gr[1]); 847 } 848 break; 849 case FORMAT_R32I: 850 r = static_cast<float>(*((int*)element)); 851 break; 852 case FORMAT_R32UI: 853 r = static_cast<float>(*((unsigned int*)element)); 854 break; 855 case FORMAT_V8U8: 856 { 857 unsigned short vu = *(unsigned short*)element; 858 859 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000); 860 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000); 861 } 862 break; 863 case FORMAT_L6V5U5: 864 { 865 unsigned short lvu = *(unsigned short*)element; 866 867 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000); 868 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000); 869 b = (lvu & 0xFC00) * (1.0f / 0xFC00); 870 } 871 break; 872 case FORMAT_Q8W8V8U8: 873 { 874 unsigned int qwvu = *(unsigned int*)element; 875 876 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 877 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 878 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000); 879 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000); 880 } 881 break; 882 case FORMAT_X8L8V8U8: 883 { 884 unsigned int xlvu = *(unsigned int*)element; 885 886 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 887 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 888 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000); 889 } 890 break; 891 case FORMAT_R8G8B8: 892 r = ((unsigned char*)element)[2] * (1.0f / 0xFF); 893 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 894 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 895 break; 896 case FORMAT_B8G8R8: 897 r = ((unsigned char*)element)[0] * (1.0f / 0xFF); 898 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 899 b = ((unsigned char*)element)[2] * (1.0f / 0xFF); 900 break; 901 case FORMAT_V16U16: 902 { 903 unsigned int vu = *(unsigned int*)element; 904 905 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000); 906 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000); 907 } 908 break; 909 case FORMAT_A2W10V10U10: 910 { 911 unsigned int awvu = *(unsigned int*)element; 912 913 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000); 914 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000); 915 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000); 916 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000); 917 } 918 break; 919 case FORMAT_A16W16V16U16: 920 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 921 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 922 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 923 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 924 break; 925 case FORMAT_Q16W16V16U16: 926 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 927 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 928 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 929 a = ((signed short*)element)[3] * (1.0f / 0x7FFF); 930 break; 931 case FORMAT_L8: 932 r = 933 g = 934 b = *(unsigned char*)element * (1.0f / 0xFF); 935 break; 936 case FORMAT_A4L4: 937 { 938 unsigned char al = *(unsigned char*)element; 939 940 r = 941 g = 942 b = (al & 0x0F) * (1.0f / 0x0F); 943 a = (al & 0xF0) * (1.0f / 0xF0); 944 } 945 break; 946 case FORMAT_L16: 947 r = 948 g = 949 b = *(unsigned short*)element * (1.0f / 0xFFFF); 950 break; 951 case FORMAT_A8L8: 952 r = 953 g = 954 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 955 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 956 break; 957 case FORMAT_L16F: 958 r = 959 g = 960 b = *(half*)element; 961 break; 962 case FORMAT_A16L16F: 963 r = 964 g = 965 b = ((half*)element)[0]; 966 a = ((half*)element)[1]; 967 break; 968 case FORMAT_L32F: 969 r = 970 g = 971 b = *(float*)element; 972 break; 973 case FORMAT_A32L32F: 974 r = 975 g = 976 b = ((float*)element)[0]; 977 a = ((float*)element)[1]; 978 break; 979 case FORMAT_A16F: 980 a = *(half*)element; 981 break; 982 case FORMAT_R16F: 983 r = *(half*)element; 984 break; 985 case FORMAT_G16R16F: 986 r = ((half*)element)[0]; 987 g = ((half*)element)[1]; 988 break; 989 case FORMAT_X16B16G16R16F: 990 case FORMAT_X16B16G16R16F_UNSIGNED: 991 case FORMAT_B16G16R16F: 992 r = ((half*)element)[0]; 993 g = ((half*)element)[1]; 994 b = ((half*)element)[2]; 995 break; 996 case FORMAT_A16B16G16R16F: 997 r = ((half*)element)[0]; 998 g = ((half*)element)[1]; 999 b = ((half*)element)[2]; 1000 a = ((half*)element)[3]; 1001 break; 1002 case FORMAT_A32F: 1003 a = *(float*)element; 1004 break; 1005 case FORMAT_R32F: 1006 r = *(float*)element; 1007 break; 1008 case FORMAT_G32R32F: 1009 r = ((float*)element)[0]; 1010 g = ((float*)element)[1]; 1011 break; 1012 case FORMAT_X32B32G32R32F: 1013 case FORMAT_X32B32G32R32F_UNSIGNED: 1014 case FORMAT_B32G32R32F: 1015 r = ((float*)element)[0]; 1016 g = ((float*)element)[1]; 1017 b = ((float*)element)[2]; 1018 break; 1019 case FORMAT_A32B32G32R32F: 1020 r = ((float*)element)[0]; 1021 g = ((float*)element)[1]; 1022 b = ((float*)element)[2]; 1023 a = ((float*)element)[3]; 1024 break; 1025 case FORMAT_D32F: 1026 case FORMAT_D32FS8: 1027 case FORMAT_D32F_LOCKABLE: 1028 case FORMAT_D32FS8_TEXTURE: 1029 case FORMAT_D32F_SHADOW: 1030 case FORMAT_D32FS8_SHADOW: 1031 r = *(float*)element; 1032 g = r; 1033 b = r; 1034 a = r; 1035 break; 1036 case FORMAT_D32F_COMPLEMENTARY: 1037 case FORMAT_D32FS8_COMPLEMENTARY: 1038 r = 1.0f - *(float*)element; 1039 g = r; 1040 b = r; 1041 a = r; 1042 break; 1043 case FORMAT_S8: 1044 r = *(unsigned char*)element * (1.0f / 0xFF); 1045 break; 1046 default: 1047 ASSERT(false); 1048 } 1049 1050 if(isSRGBformat(format)) 1051 { 1052 r = sRGBtoLinear(r); 1053 g = sRGBtoLinear(g); 1054 b = sRGBtoLinear(b); 1055 } 1056 1057 return Color<float>(r, g, b, a); 1058 } 1059 sample(float x,float y,float z) const1060 Color<float> Surface::Buffer::sample(float x, float y, float z) const 1061 { 1062 x -= 0.5f; 1063 y -= 0.5f; 1064 z -= 0.5f; 1065 1066 int x0 = clamp((int)x, 0, width - 1); 1067 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1068 1069 int y0 = clamp((int)y, 0, height - 1); 1070 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1071 1072 int z0 = clamp((int)z, 0, depth - 1); 1073 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1; 1074 1075 Color<float> c000 = read(x0, y0, z0); 1076 Color<float> c100 = read(x1, y0, z0); 1077 Color<float> c010 = read(x0, y1, z0); 1078 Color<float> c110 = read(x1, y1, z0); 1079 Color<float> c001 = read(x0, y0, z1); 1080 Color<float> c101 = read(x1, y0, z1); 1081 Color<float> c011 = read(x0, y1, z1); 1082 Color<float> c111 = read(x1, y1, z1); 1083 1084 float fx = x - x0; 1085 float fy = y - y0; 1086 float fz = z - z0; 1087 1088 c000 *= (1 - fx) * (1 - fy) * (1 - fz); 1089 c100 *= fx * (1 - fy) * (1 - fz); 1090 c010 *= (1 - fx) * fy * (1 - fz); 1091 c110 *= fx * fy * (1 - fz); 1092 c001 *= (1 - fx) * (1 - fy) * fz; 1093 c101 *= fx * (1 - fy) * fz; 1094 c011 *= (1 - fx) * fy * fz; 1095 c111 *= fx * fy * fz; 1096 1097 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111; 1098 } 1099 sample(float x,float y,int layer) const1100 Color<float> Surface::Buffer::sample(float x, float y, int layer) const 1101 { 1102 x -= 0.5f; 1103 y -= 0.5f; 1104 1105 int x0 = clamp((int)x, 0, width - 1); 1106 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1107 1108 int y0 = clamp((int)y, 0, height - 1); 1109 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1110 1111 Color<float> c00 = read(x0, y0, layer); 1112 Color<float> c10 = read(x1, y0, layer); 1113 Color<float> c01 = read(x0, y1, layer); 1114 Color<float> c11 = read(x1, y1, layer); 1115 1116 float fx = x - x0; 1117 float fy = y - y0; 1118 1119 c00 *= (1 - fx) * (1 - fy); 1120 c10 *= fx * (1 - fy); 1121 c01 *= (1 - fx) * fy; 1122 c11 *= fx * fy; 1123 1124 return c00 + c10 + c01 + c11; 1125 } 1126 lockRect(int x,int y,int z,Lock lock)1127 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock) 1128 { 1129 this->lock = lock; 1130 1131 switch(lock) 1132 { 1133 case LOCK_UNLOCKED: 1134 case LOCK_READONLY: 1135 case LOCK_UPDATE: 1136 break; 1137 case LOCK_WRITEONLY: 1138 case LOCK_READWRITE: 1139 case LOCK_DISCARD: 1140 dirty = true; 1141 break; 1142 default: 1143 ASSERT(false); 1144 } 1145 1146 if(buffer) 1147 { 1148 x += border; 1149 y += border; 1150 1151 switch(format) 1152 { 1153 case FORMAT_DXT1: 1154 case FORMAT_ATI1: 1155 case FORMAT_ETC1: 1156 case FORMAT_R11_EAC: 1157 case FORMAT_SIGNED_R11_EAC: 1158 case FORMAT_RGB8_ETC2: 1159 case FORMAT_SRGB8_ETC2: 1160 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1161 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1162 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1163 case FORMAT_RG11_EAC: 1164 case FORMAT_SIGNED_RG11_EAC: 1165 case FORMAT_RGBA8_ETC2_EAC: 1166 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1167 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1168 case FORMAT_DXT3: 1169 case FORMAT_DXT5: 1170 case FORMAT_ATI2: 1171 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1172 default: 1173 return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB; 1174 } 1175 } 1176 1177 return nullptr; 1178 } 1179 unlockRect()1180 void Surface::Buffer::unlockRect() 1181 { 1182 lock = LOCK_UNLOCKED; 1183 } 1184 1185 class SurfaceImplementation : public Surface 1186 { 1187 public: SurfaceImplementation(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1188 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1189 : Surface(width, height, depth, format, pixels, pitch, slice) {} SurfaceImplementation(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchP=0)1190 SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0) 1191 : Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {} ~SurfaceImplementation()1192 ~SurfaceImplementation() override {} 1193 lockInternal(int x,int y,int z,Lock lock,Accessor client)1194 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override 1195 { 1196 return Surface::lockInternal(x, y, z, lock, client); 1197 } 1198 unlockInternal()1199 void unlockInternal() override 1200 { 1201 Surface::unlockInternal(); 1202 } 1203 }; 1204 create(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1205 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1206 { 1207 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice); 1208 } 1209 create(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchPprovided)1210 Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) 1211 { 1212 return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided); 1213 } 1214 Surface(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1215 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false) 1216 { 1217 resource = new Resource(0); 1218 hasParent = false; 1219 ownExternal = false; 1220 depth = max(1, depth); 1221 1222 external.buffer = pixels; 1223 external.width = width; 1224 external.height = height; 1225 external.depth = depth; 1226 external.samples = 1; 1227 external.format = format; 1228 external.bytes = bytes(external.format); 1229 external.pitchB = pitch; 1230 external.pitchP = external.bytes ? pitch / external.bytes : 0; 1231 external.sliceB = slice; 1232 external.sliceP = external.bytes ? slice / external.bytes : 0; 1233 external.border = 0; 1234 external.lock = LOCK_UNLOCKED; 1235 external.dirty = true; 1236 1237 internal.buffer = nullptr; 1238 internal.width = width; 1239 internal.height = height; 1240 internal.depth = depth; 1241 internal.samples = 1; 1242 internal.format = selectInternalFormat(format); 1243 internal.bytes = bytes(internal.format); 1244 internal.pitchB = pitchB(internal.width, 0, internal.format, false); 1245 internal.pitchP = pitchP(internal.width, 0, internal.format, false); 1246 internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false); 1247 internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false); 1248 internal.border = 0; 1249 internal.lock = LOCK_UNLOCKED; 1250 internal.dirty = false; 1251 1252 stencil.buffer = nullptr; 1253 stencil.width = width; 1254 stencil.height = height; 1255 stencil.depth = depth; 1256 stencil.samples = 1; 1257 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL; 1258 stencil.bytes = bytes(stencil.format); 1259 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false); 1260 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false); 1261 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false); 1262 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false); 1263 stencil.border = 0; 1264 stencil.lock = LOCK_UNLOCKED; 1265 stencil.dirty = false; 1266 1267 dirtyContents = true; 1268 paletteUsed = 0; 1269 } 1270 Surface(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchPprovided)1271 Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget) 1272 { 1273 resource = texture ? texture : new Resource(0); 1274 hasParent = texture != nullptr; 1275 ownExternal = true; 1276 depth = max(1, depth); 1277 samples = max(1, samples); 1278 1279 external.buffer = nullptr; 1280 external.width = width; 1281 external.height = height; 1282 external.depth = depth; 1283 external.samples = (short)samples; 1284 external.format = format; 1285 external.bytes = bytes(external.format); 1286 external.pitchB = !pitchPprovided ? pitchB(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided * external.bytes; 1287 external.pitchP = !pitchPprovided ? pitchP(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided; 1288 external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture); 1289 external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture); 1290 external.border = 0; 1291 external.lock = LOCK_UNLOCKED; 1292 external.dirty = false; 1293 1294 internal.buffer = nullptr; 1295 internal.width = width; 1296 internal.height = height; 1297 internal.depth = depth; 1298 internal.samples = (short)samples; 1299 internal.format = selectInternalFormat(format); 1300 internal.bytes = bytes(internal.format); 1301 internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes; 1302 internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided; 1303 internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget); 1304 internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget); 1305 internal.border = (short)border; 1306 internal.lock = LOCK_UNLOCKED; 1307 internal.dirty = false; 1308 1309 stencil.buffer = nullptr; 1310 stencil.width = width; 1311 stencil.height = height; 1312 stencil.depth = depth; 1313 stencil.samples = (short)samples; 1314 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL; 1315 stencil.bytes = bytes(stencil.format); 1316 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget); 1317 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget); 1318 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget); 1319 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget); 1320 stencil.border = 0; 1321 stencil.lock = LOCK_UNLOCKED; 1322 stencil.dirty = false; 1323 1324 dirtyContents = true; 1325 paletteUsed = 0; 1326 } 1327 ~Surface()1328 Surface::~Surface() 1329 { 1330 // sync() must be called before this destructor to ensure all locks have been released. 1331 // We can't call it here because the parent resource may already have been destroyed. 1332 ASSERT(isUnlocked()); 1333 1334 if(!hasParent) 1335 { 1336 resource->destruct(); 1337 } 1338 1339 if(ownExternal) 1340 { 1341 deallocate(external.buffer); 1342 } 1343 1344 if(internal.buffer != external.buffer) 1345 { 1346 deallocate(internal.buffer); 1347 } 1348 1349 deallocate(stencil.buffer); 1350 1351 external.buffer = nullptr; 1352 internal.buffer = nullptr; 1353 stencil.buffer = nullptr; 1354 } 1355 lockExternal(int x,int y,int z,Lock lock,Accessor client)1356 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client) 1357 { 1358 resource->lock(client); 1359 1360 if(!external.buffer) 1361 { 1362 if(internal.buffer && identicalBuffers()) 1363 { 1364 external.buffer = internal.buffer; 1365 } 1366 else 1367 { 1368 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format); 1369 } 1370 } 1371 1372 if(internal.dirty) 1373 { 1374 if(lock != LOCK_DISCARD) 1375 { 1376 update(external, internal); 1377 } 1378 1379 internal.dirty = false; 1380 } 1381 1382 switch(lock) 1383 { 1384 case LOCK_READONLY: 1385 break; 1386 case LOCK_WRITEONLY: 1387 case LOCK_READWRITE: 1388 case LOCK_DISCARD: 1389 dirtyContents = true; 1390 break; 1391 default: 1392 ASSERT(false); 1393 } 1394 1395 return external.lockRect(x, y, z, lock); 1396 } 1397 unlockExternal()1398 void Surface::unlockExternal() 1399 { 1400 external.unlockRect(); 1401 1402 resource->unlock(); 1403 } 1404 lockInternal(int x,int y,int z,Lock lock,Accessor client)1405 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client) 1406 { 1407 if(lock != LOCK_UNLOCKED) 1408 { 1409 resource->lock(client); 1410 } 1411 1412 if(!internal.buffer) 1413 { 1414 if(external.buffer && identicalBuffers()) 1415 { 1416 internal.buffer = external.buffer; 1417 } 1418 else 1419 { 1420 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format); 1421 } 1422 } 1423 1424 // FIXME: WHQL requires conversion to lower external precision and back 1425 if(logPrecision >= WHQL) 1426 { 1427 if(internal.dirty && renderTarget && internal.format != external.format) 1428 { 1429 if(lock != LOCK_DISCARD) 1430 { 1431 switch(external.format) 1432 { 1433 case FORMAT_R3G3B2: 1434 case FORMAT_A8R3G3B2: 1435 case FORMAT_A1R5G5B5: 1436 case FORMAT_A2R10G10B10: 1437 case FORMAT_A2B10G10R10: 1438 lockExternal(0, 0, 0, LOCK_READWRITE, client); 1439 unlockExternal(); 1440 break; 1441 default: 1442 // Difference passes WHQL 1443 break; 1444 } 1445 } 1446 } 1447 } 1448 1449 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID)) 1450 { 1451 if(lock != LOCK_DISCARD) 1452 { 1453 update(internal, external); 1454 } 1455 1456 external.dirty = false; 1457 paletteUsed = Surface::paletteID; 1458 } 1459 1460 switch(lock) 1461 { 1462 case LOCK_UNLOCKED: 1463 case LOCK_READONLY: 1464 break; 1465 case LOCK_WRITEONLY: 1466 case LOCK_READWRITE: 1467 case LOCK_DISCARD: 1468 dirtyContents = true; 1469 break; 1470 default: 1471 ASSERT(false); 1472 } 1473 1474 if(lock == LOCK_READONLY && client == PUBLIC) 1475 { 1476 resolve(); 1477 } 1478 1479 return internal.lockRect(x, y, z, lock); 1480 } 1481 unlockInternal()1482 void Surface::unlockInternal() 1483 { 1484 internal.unlockRect(); 1485 1486 resource->unlock(); 1487 } 1488 lockStencil(int x,int y,int front,Accessor client)1489 void *Surface::lockStencil(int x, int y, int front, Accessor client) 1490 { 1491 resource->lock(client); 1492 1493 if(stencil.format == FORMAT_NULL) 1494 { 1495 return nullptr; 1496 } 1497 1498 if(!stencil.buffer) 1499 { 1500 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format); 1501 } 1502 1503 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME 1504 } 1505 unlockStencil()1506 void Surface::unlockStencil() 1507 { 1508 stencil.unlockRect(); 1509 1510 resource->unlock(); 1511 } 1512 bytes(Format format)1513 int Surface::bytes(Format format) 1514 { 1515 switch(format) 1516 { 1517 case FORMAT_NULL: return 0; 1518 case FORMAT_P8: return 1; 1519 case FORMAT_A8P8: return 2; 1520 case FORMAT_A8: return 1; 1521 case FORMAT_R8I: return 1; 1522 case FORMAT_R8: return 1; 1523 case FORMAT_R3G3B2: return 1; 1524 case FORMAT_R16I: return 2; 1525 case FORMAT_R16UI: return 2; 1526 case FORMAT_A8R3G3B2: return 2; 1527 case FORMAT_R5G6B5: return 2; 1528 case FORMAT_A1R5G5B5: return 2; 1529 case FORMAT_X1R5G5B5: return 2; 1530 case FORMAT_R5G5B5A1: return 2; 1531 case FORMAT_X4R4G4B4: return 2; 1532 case FORMAT_A4R4G4B4: return 2; 1533 case FORMAT_R4G4B4A4: return 2; 1534 case FORMAT_R8G8B8: return 3; 1535 case FORMAT_B8G8R8: return 3; 1536 case FORMAT_R32I: return 4; 1537 case FORMAT_R32UI: return 4; 1538 case FORMAT_X8R8G8B8: return 4; 1539 // case FORMAT_X8G8R8B8Q: return 4; 1540 case FORMAT_A8R8G8B8: return 4; 1541 // case FORMAT_A8G8R8B8Q: return 4; 1542 case FORMAT_X8B8G8R8I: return 4; 1543 case FORMAT_X8B8G8R8: return 4; 1544 case FORMAT_SRGB8_X8: return 4; 1545 case FORMAT_SRGB8_A8: return 4; 1546 case FORMAT_A8B8G8R8I: return 4; 1547 case FORMAT_R8UI: return 1; 1548 case FORMAT_G8R8UI: return 2; 1549 case FORMAT_X8B8G8R8UI: return 4; 1550 case FORMAT_A8B8G8R8UI: return 4; 1551 case FORMAT_A8B8G8R8: return 4; 1552 case FORMAT_R8_SNORM: return 1; 1553 case FORMAT_G8R8_SNORM: return 2; 1554 case FORMAT_X8B8G8R8_SNORM: return 4; 1555 case FORMAT_A8B8G8R8_SNORM: return 4; 1556 case FORMAT_A2R10G10B10: return 4; 1557 case FORMAT_A2B10G10R10: return 4; 1558 case FORMAT_A2B10G10R10UI: return 4; 1559 case FORMAT_G8R8I: return 2; 1560 case FORMAT_G8R8: return 2; 1561 case FORMAT_G16R16I: return 4; 1562 case FORMAT_G16R16UI: return 4; 1563 case FORMAT_G16R16: return 4; 1564 case FORMAT_G32R32I: return 8; 1565 case FORMAT_G32R32UI: return 8; 1566 case FORMAT_X16B16G16R16I: return 8; 1567 case FORMAT_X16B16G16R16UI: return 8; 1568 case FORMAT_A16B16G16R16I: return 8; 1569 case FORMAT_A16B16G16R16UI: return 8; 1570 case FORMAT_A16B16G16R16: return 8; 1571 case FORMAT_X32B32G32R32I: return 16; 1572 case FORMAT_X32B32G32R32UI: return 16; 1573 case FORMAT_A32B32G32R32I: return 16; 1574 case FORMAT_A32B32G32R32UI: return 16; 1575 // Compressed formats 1576 case FORMAT_DXT1: return 2; // Column of four pixels 1577 case FORMAT_DXT3: return 4; // Column of four pixels 1578 case FORMAT_DXT5: return 4; // Column of four pixels 1579 case FORMAT_ATI1: return 2; // Column of four pixels 1580 case FORMAT_ATI2: return 4; // Column of four pixels 1581 case FORMAT_ETC1: return 2; // Column of four pixels 1582 case FORMAT_R11_EAC: return 2; 1583 case FORMAT_SIGNED_R11_EAC: return 2; 1584 case FORMAT_RG11_EAC: return 4; 1585 case FORMAT_SIGNED_RG11_EAC: return 4; 1586 case FORMAT_RGB8_ETC2: return 2; 1587 case FORMAT_SRGB8_ETC2: return 2; 1588 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1589 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1590 case FORMAT_RGBA8_ETC2_EAC: return 4; 1591 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4; 1592 // Bumpmap formats 1593 case FORMAT_V8U8: return 2; 1594 case FORMAT_L6V5U5: return 2; 1595 case FORMAT_Q8W8V8U8: return 4; 1596 case FORMAT_X8L8V8U8: return 4; 1597 case FORMAT_A2W10V10U10: return 4; 1598 case FORMAT_V16U16: return 4; 1599 case FORMAT_A16W16V16U16: return 8; 1600 case FORMAT_Q16W16V16U16: return 8; 1601 // Luminance formats 1602 case FORMAT_L8: return 1; 1603 case FORMAT_A4L4: return 1; 1604 case FORMAT_L16: return 2; 1605 case FORMAT_A8L8: return 2; 1606 case FORMAT_L16F: return 2; 1607 case FORMAT_A16L16F: return 4; 1608 case FORMAT_L32F: return 4; 1609 case FORMAT_A32L32F: return 8; 1610 // Floating-point formats 1611 case FORMAT_A16F: return 2; 1612 case FORMAT_R16F: return 2; 1613 case FORMAT_G16R16F: return 4; 1614 case FORMAT_B16G16R16F: return 6; 1615 case FORMAT_X16B16G16R16F: return 8; 1616 case FORMAT_A16B16G16R16F: return 8; 1617 case FORMAT_X16B16G16R16F_UNSIGNED: return 8; 1618 case FORMAT_A32F: return 4; 1619 case FORMAT_R32F: return 4; 1620 case FORMAT_G32R32F: return 8; 1621 case FORMAT_B32G32R32F: return 12; 1622 case FORMAT_X32B32G32R32F: return 16; 1623 case FORMAT_A32B32G32R32F: return 16; 1624 case FORMAT_X32B32G32R32F_UNSIGNED: return 16; 1625 // Depth/stencil formats 1626 case FORMAT_D16: return 2; 1627 case FORMAT_D32: return 4; 1628 case FORMAT_D24X8: return 4; 1629 case FORMAT_D24S8: return 4; 1630 case FORMAT_D24FS8: return 4; 1631 case FORMAT_D32F: return 4; 1632 case FORMAT_D32FS8: return 4; 1633 case FORMAT_D32F_COMPLEMENTARY: return 4; 1634 case FORMAT_D32FS8_COMPLEMENTARY: return 4; 1635 case FORMAT_D32F_LOCKABLE: return 4; 1636 case FORMAT_D32FS8_TEXTURE: return 4; 1637 case FORMAT_D32F_SHADOW: return 4; 1638 case FORMAT_D32FS8_SHADOW: return 4; 1639 case FORMAT_DF24S8: return 4; 1640 case FORMAT_DF16S8: return 2; 1641 case FORMAT_INTZ: return 4; 1642 case FORMAT_S8: return 1; 1643 case FORMAT_YV12_BT601: return 1; // Y plane only 1644 case FORMAT_YV12_BT709: return 1; // Y plane only 1645 case FORMAT_YV12_JFIF: return 1; // Y plane only 1646 default: 1647 ASSERT(false); 1648 } 1649 1650 return 0; 1651 } 1652 pitchB(int width,int border,Format format,bool target)1653 int Surface::pitchB(int width, int border, Format format, bool target) 1654 { 1655 width += 2 * border; 1656 1657 // Render targets require 2x2 quads 1658 if(target || isDepth(format) || isStencil(format)) 1659 { 1660 width = align<2>(width); 1661 } 1662 1663 switch(format) 1664 { 1665 case FORMAT_DXT1: 1666 case FORMAT_ETC1: 1667 case FORMAT_R11_EAC: 1668 case FORMAT_SIGNED_R11_EAC: 1669 case FORMAT_RGB8_ETC2: 1670 case FORMAT_SRGB8_ETC2: 1671 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1672 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1673 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows 1674 case FORMAT_RG11_EAC: 1675 case FORMAT_SIGNED_RG11_EAC: 1676 case FORMAT_RGBA8_ETC2_EAC: 1677 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1678 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1679 case FORMAT_DXT3: 1680 case FORMAT_DXT5: 1681 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1682 case FORMAT_ATI1: 1683 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row 1684 case FORMAT_ATI2: 1685 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row 1686 case FORMAT_YV12_BT601: 1687 case FORMAT_YV12_BT709: 1688 case FORMAT_YV12_JFIF: 1689 return align<16>(width); 1690 default: 1691 return bytes(format) * width; 1692 } 1693 } 1694 pitchP(int width,int border,Format format,bool target)1695 int Surface::pitchP(int width, int border, Format format, bool target) 1696 { 1697 int B = bytes(format); 1698 1699 return B > 0 ? pitchB(width, border, format, target) / B : 0; 1700 } 1701 sliceB(int width,int height,int border,Format format,bool target)1702 int Surface::sliceB(int width, int height, int border, Format format, bool target) 1703 { 1704 height += 2 * border; 1705 1706 // Render targets require 2x2 quads 1707 if(target || isDepth(format) || isStencil(format)) 1708 { 1709 height = align<2>(height); 1710 } 1711 1712 switch(format) 1713 { 1714 case FORMAT_DXT1: 1715 case FORMAT_DXT3: 1716 case FORMAT_DXT5: 1717 case FORMAT_ETC1: 1718 case FORMAT_R11_EAC: 1719 case FORMAT_SIGNED_R11_EAC: 1720 case FORMAT_RG11_EAC: 1721 case FORMAT_SIGNED_RG11_EAC: 1722 case FORMAT_RGB8_ETC2: 1723 case FORMAT_SRGB8_ETC2: 1724 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1725 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1726 case FORMAT_RGBA8_ETC2_EAC: 1727 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1728 return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows 1729 case FORMAT_ATI1: 1730 case FORMAT_ATI2: 1731 return pitchB(width, border, format, target) * align<4>(height); // Pitch computed per row 1732 default: 1733 return pitchB(width, border, format, target) * height; // Pitch computed per row 1734 } 1735 } 1736 sliceP(int width,int height,int border,Format format,bool target)1737 int Surface::sliceP(int width, int height, int border, Format format, bool target) 1738 { 1739 int B = bytes(format); 1740 1741 return B > 0 ? sliceB(width, height, border, format, target) / B : 0; 1742 } 1743 update(Buffer & destination,Buffer & source)1744 void Surface::update(Buffer &destination, Buffer &source) 1745 { 1746 // ASSERT(source.lock != LOCK_UNLOCKED); 1747 // ASSERT(destination.lock != LOCK_UNLOCKED); 1748 1749 if(destination.buffer != source.buffer) 1750 { 1751 ASSERT(source.dirty && !destination.dirty); 1752 1753 switch(source.format) 1754 { 1755 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format 1756 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format 1757 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format 1758 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format 1759 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format 1760 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format 1761 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format 1762 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format 1763 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format 1764 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format 1765 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format 1766 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format 1767 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format 1768 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format 1769 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format 1770 case FORMAT_ETC1: 1771 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format 1772 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format 1773 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format 1774 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format 1775 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format 1776 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format 1777 default: genericUpdate(destination, source); break; 1778 } 1779 } 1780 } 1781 genericUpdate(Buffer & destination,Buffer & source)1782 void Surface::genericUpdate(Buffer &destination, Buffer &source) 1783 { 1784 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1785 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1786 1787 int depth = min(destination.depth, source.depth); 1788 int height = min(destination.height, source.height); 1789 int width = min(destination.width, source.width); 1790 int rowBytes = width * source.bytes; 1791 1792 for(int z = 0; z < depth; z++) 1793 { 1794 unsigned char *sourceRow = sourceSlice; 1795 unsigned char *destinationRow = destinationSlice; 1796 1797 for(int y = 0; y < height; y++) 1798 { 1799 if(source.format == destination.format) 1800 { 1801 memcpy(destinationRow, sourceRow, rowBytes); 1802 } 1803 else 1804 { 1805 unsigned char *sourceElement = sourceRow; 1806 unsigned char *destinationElement = destinationRow; 1807 1808 for(int x = 0; x < width; x++) 1809 { 1810 Color<float> color = source.read(sourceElement); 1811 destination.write(destinationElement, color); 1812 1813 sourceElement += source.bytes; 1814 destinationElement += destination.bytes; 1815 } 1816 } 1817 1818 sourceRow += source.pitchB; 1819 destinationRow += destination.pitchB; 1820 } 1821 1822 sourceSlice += source.sliceB; 1823 destinationSlice += destination.sliceB; 1824 } 1825 1826 source.unlockRect(); 1827 destination.unlockRect(); 1828 } 1829 decodeR8G8B8(Buffer & destination,Buffer & source)1830 void Surface::decodeR8G8B8(Buffer &destination, Buffer &source) 1831 { 1832 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1833 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1834 1835 int depth = min(destination.depth, source.depth); 1836 int height = min(destination.height, source.height); 1837 int width = min(destination.width, source.width); 1838 1839 for(int z = 0; z < depth; z++) 1840 { 1841 unsigned char *sourceRow = sourceSlice; 1842 unsigned char *destinationRow = destinationSlice; 1843 1844 for(int y = 0; y < height; y++) 1845 { 1846 unsigned char *sourceElement = sourceRow; 1847 unsigned char *destinationElement = destinationRow; 1848 1849 for(int x = 0; x < width; x++) 1850 { 1851 unsigned int b = sourceElement[0]; 1852 unsigned int g = sourceElement[1]; 1853 unsigned int r = sourceElement[2]; 1854 1855 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0); 1856 1857 sourceElement += source.bytes; 1858 destinationElement += destination.bytes; 1859 } 1860 1861 sourceRow += source.pitchB; 1862 destinationRow += destination.pitchB; 1863 } 1864 1865 sourceSlice += source.sliceB; 1866 destinationSlice += destination.sliceB; 1867 } 1868 1869 source.unlockRect(); 1870 destination.unlockRect(); 1871 } 1872 decodeX1R5G5B5(Buffer & destination,Buffer & source)1873 void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source) 1874 { 1875 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1876 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1877 1878 int depth = min(destination.depth, source.depth); 1879 int height = min(destination.height, source.height); 1880 int width = min(destination.width, source.width); 1881 1882 for(int z = 0; z < depth; z++) 1883 { 1884 unsigned char *sourceRow = sourceSlice; 1885 unsigned char *destinationRow = destinationSlice; 1886 1887 for(int y = 0; y < height; y++) 1888 { 1889 unsigned char *sourceElement = sourceRow; 1890 unsigned char *destinationElement = destinationRow; 1891 1892 for(int x = 0; x < width; x++) 1893 { 1894 unsigned int xrgb = *(unsigned short*)sourceElement; 1895 1896 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1897 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1898 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8); 1899 1900 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 1901 1902 sourceElement += source.bytes; 1903 destinationElement += destination.bytes; 1904 } 1905 1906 sourceRow += source.pitchB; 1907 destinationRow += destination.pitchB; 1908 } 1909 1910 sourceSlice += source.sliceB; 1911 destinationSlice += destination.sliceB; 1912 } 1913 1914 source.unlockRect(); 1915 destination.unlockRect(); 1916 } 1917 decodeA1R5G5B5(Buffer & destination,Buffer & source)1918 void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source) 1919 { 1920 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1921 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1922 1923 int depth = min(destination.depth, source.depth); 1924 int height = min(destination.height, source.height); 1925 int width = min(destination.width, source.width); 1926 1927 for(int z = 0; z < depth; z++) 1928 { 1929 unsigned char *sourceRow = sourceSlice; 1930 unsigned char *destinationRow = destinationSlice; 1931 1932 for(int y = 0; y < height; y++) 1933 { 1934 unsigned char *sourceElement = sourceRow; 1935 unsigned char *destinationElement = destinationRow; 1936 1937 for(int x = 0; x < width; x++) 1938 { 1939 unsigned int argb = *(unsigned short*)sourceElement; 1940 1941 unsigned int a = (argb & 0x8000) * 130560; 1942 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1943 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1944 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8); 1945 1946 *(unsigned int*)destinationElement = a | r | g | b; 1947 1948 sourceElement += source.bytes; 1949 destinationElement += destination.bytes; 1950 } 1951 1952 sourceRow += source.pitchB; 1953 destinationRow += destination.pitchB; 1954 } 1955 1956 sourceSlice += source.sliceB; 1957 destinationSlice += destination.sliceB; 1958 } 1959 1960 source.unlockRect(); 1961 destination.unlockRect(); 1962 } 1963 decodeX4R4G4B4(Buffer & destination,Buffer & source)1964 void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source) 1965 { 1966 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1967 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1968 1969 int depth = min(destination.depth, source.depth); 1970 int height = min(destination.height, source.height); 1971 int width = min(destination.width, source.width); 1972 1973 for(int z = 0; z < depth; z++) 1974 { 1975 unsigned char *sourceRow = sourceSlice; 1976 unsigned char *destinationRow = destinationSlice; 1977 1978 for(int y = 0; y < height; y++) 1979 { 1980 unsigned char *sourceElement = sourceRow; 1981 unsigned char *destinationElement = destinationRow; 1982 1983 for(int x = 0; x < width; x++) 1984 { 1985 unsigned int xrgb = *(unsigned short*)sourceElement; 1986 1987 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000; 1988 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00; 1989 unsigned int b = (xrgb & 0x000F) * 0x00000011; 1990 1991 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 1992 1993 sourceElement += source.bytes; 1994 destinationElement += destination.bytes; 1995 } 1996 1997 sourceRow += source.pitchB; 1998 destinationRow += destination.pitchB; 1999 } 2000 2001 sourceSlice += source.sliceB; 2002 destinationSlice += destination.sliceB; 2003 } 2004 2005 source.unlockRect(); 2006 destination.unlockRect(); 2007 } 2008 decodeA4R4G4B4(Buffer & destination,Buffer & source)2009 void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source) 2010 { 2011 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2012 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2013 2014 int depth = min(destination.depth, source.depth); 2015 int height = min(destination.height, source.height); 2016 int width = min(destination.width, source.width); 2017 2018 for(int z = 0; z < depth; z++) 2019 { 2020 unsigned char *sourceRow = sourceSlice; 2021 unsigned char *destinationRow = destinationSlice; 2022 2023 for(int y = 0; y < height; y++) 2024 { 2025 unsigned char *sourceElement = sourceRow; 2026 unsigned char *destinationElement = destinationRow; 2027 2028 for(int x = 0; x < width; x++) 2029 { 2030 unsigned int argb = *(unsigned short*)sourceElement; 2031 2032 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000; 2033 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000; 2034 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00; 2035 unsigned int b = (argb & 0x000F) * 0x00000011; 2036 2037 *(unsigned int*)destinationElement = a | r | g | b; 2038 2039 sourceElement += source.bytes; 2040 destinationElement += destination.bytes; 2041 } 2042 2043 sourceRow += source.pitchB; 2044 destinationRow += destination.pitchB; 2045 } 2046 2047 sourceSlice += source.sliceB; 2048 destinationSlice += destination.sliceB; 2049 } 2050 2051 source.unlockRect(); 2052 destination.unlockRect(); 2053 } 2054 decodeP8(Buffer & destination,Buffer & source)2055 void Surface::decodeP8(Buffer &destination, Buffer &source) 2056 { 2057 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2058 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2059 2060 int depth = min(destination.depth, source.depth); 2061 int height = min(destination.height, source.height); 2062 int width = min(destination.width, source.width); 2063 2064 for(int z = 0; z < depth; z++) 2065 { 2066 unsigned char *sourceRow = sourceSlice; 2067 unsigned char *destinationRow = destinationSlice; 2068 2069 for(int y = 0; y < height; y++) 2070 { 2071 unsigned char *sourceElement = sourceRow; 2072 unsigned char *destinationElement = destinationRow; 2073 2074 for(int x = 0; x < width; x++) 2075 { 2076 unsigned int abgr = palette[*(unsigned char*)sourceElement]; 2077 2078 unsigned int r = (abgr & 0x000000FF) << 16; 2079 unsigned int g = (abgr & 0x0000FF00) << 0; 2080 unsigned int b = (abgr & 0x00FF0000) >> 16; 2081 unsigned int a = (abgr & 0xFF000000) >> 0; 2082 2083 *(unsigned int*)destinationElement = a | r | g | b; 2084 2085 sourceElement += source.bytes; 2086 destinationElement += destination.bytes; 2087 } 2088 2089 sourceRow += source.pitchB; 2090 destinationRow += destination.pitchB; 2091 } 2092 2093 sourceSlice += source.sliceB; 2094 destinationSlice += destination.sliceB; 2095 } 2096 2097 source.unlockRect(); 2098 destination.unlockRect(); 2099 } 2100 decodeDXT1(Buffer & internal,Buffer & external)2101 void Surface::decodeDXT1(Buffer &internal, Buffer &external) 2102 { 2103 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2104 const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY); 2105 2106 for(int z = 0; z < external.depth; z++) 2107 { 2108 unsigned int *dest = destSlice; 2109 2110 for(int y = 0; y < external.height; y += 4) 2111 { 2112 for(int x = 0; x < external.width; x += 4) 2113 { 2114 Color<byte> c[4]; 2115 2116 c[0] = source->c0; 2117 c[1] = source->c1; 2118 2119 if(source->c0 > source->c1) // No transparency 2120 { 2121 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2122 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2123 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2124 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2125 c[2].a = 0xFF; 2126 2127 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2128 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2129 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2130 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2131 c[3].a = 0xFF; 2132 } 2133 else // c3 transparent 2134 { 2135 // c2 = 1 / 2 * c0 + 1 / 2 * c1 2136 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2); 2137 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2); 2138 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2); 2139 c[2].a = 0xFF; 2140 2141 c[3].r = 0; 2142 c[3].g = 0; 2143 c[3].b = 0; 2144 c[3].a = 0; 2145 } 2146 2147 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2148 { 2149 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2150 { 2151 dest[(x + i) + (y + j) * internal.pitchP] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4]; 2152 } 2153 } 2154 2155 source++; 2156 } 2157 } 2158 2159 (byte*&)destSlice += internal.sliceB; 2160 } 2161 2162 external.unlockRect(); 2163 internal.unlockRect(); 2164 } 2165 decodeDXT3(Buffer & internal,Buffer & external)2166 void Surface::decodeDXT3(Buffer &internal, Buffer &external) 2167 { 2168 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2169 const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY); 2170 2171 for(int z = 0; z < external.depth; z++) 2172 { 2173 unsigned int *dest = destSlice; 2174 2175 for(int y = 0; y < external.height; y += 4) 2176 { 2177 for(int x = 0; x < external.width; x += 4) 2178 { 2179 Color<byte> c[4]; 2180 2181 c[0] = source->c0; 2182 c[1] = source->c1; 2183 2184 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2185 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2186 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2187 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2188 2189 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2190 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2191 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2192 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2193 2194 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2195 { 2196 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2197 { 2198 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F; 2199 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24)); 2200 2201 dest[(x + i) + (y + j) * internal.pitchP] = color; 2202 } 2203 } 2204 2205 source++; 2206 } 2207 } 2208 2209 (byte*&)destSlice += internal.sliceB; 2210 } 2211 2212 external.unlockRect(); 2213 internal.unlockRect(); 2214 } 2215 decodeDXT5(Buffer & internal,Buffer & external)2216 void Surface::decodeDXT5(Buffer &internal, Buffer &external) 2217 { 2218 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2219 const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY); 2220 2221 for(int z = 0; z < external.depth; z++) 2222 { 2223 unsigned int *dest = destSlice; 2224 2225 for(int y = 0; y < external.height; y += 4) 2226 { 2227 for(int x = 0; x < external.width; x += 4) 2228 { 2229 Color<byte> c[4]; 2230 2231 c[0] = source->c0; 2232 c[1] = source->c1; 2233 2234 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2235 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2236 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2237 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2238 2239 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2240 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2241 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2242 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2243 2244 byte a[8]; 2245 2246 a[0] = source->a0; 2247 a[1] = source->a1; 2248 2249 if(a[0] > a[1]) 2250 { 2251 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7); 2252 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7); 2253 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7); 2254 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7); 2255 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7); 2256 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7); 2257 } 2258 else 2259 { 2260 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5); 2261 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5); 2262 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5); 2263 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5); 2264 a[6] = 0; 2265 a[7] = 0xFF; 2266 } 2267 2268 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2269 { 2270 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2271 { 2272 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24; 2273 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha; 2274 2275 dest[(x + i) + (y + j) * internal.pitchP] = color; 2276 } 2277 } 2278 2279 source++; 2280 } 2281 } 2282 2283 (byte*&)destSlice += internal.sliceB; 2284 } 2285 2286 external.unlockRect(); 2287 internal.unlockRect(); 2288 } 2289 decodeATI1(Buffer & internal,Buffer & external)2290 void Surface::decodeATI1(Buffer &internal, Buffer &external) 2291 { 2292 byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2293 const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY); 2294 2295 for(int z = 0; z < external.depth; z++) 2296 { 2297 byte *dest = destSlice; 2298 2299 for(int y = 0; y < external.height; y += 4) 2300 { 2301 for(int x = 0; x < external.width; x += 4) 2302 { 2303 byte r[8]; 2304 2305 r[0] = source->r0; 2306 r[1] = source->r1; 2307 2308 if(r[0] > r[1]) 2309 { 2310 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7); 2311 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7); 2312 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7); 2313 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7); 2314 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7); 2315 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7); 2316 } 2317 else 2318 { 2319 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5); 2320 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5); 2321 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5); 2322 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5); 2323 r[6] = 0; 2324 r[7] = 0xFF; 2325 } 2326 2327 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2328 { 2329 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2330 { 2331 dest[(x + i) + (y + j) * internal.pitchP] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8]; 2332 } 2333 } 2334 2335 source++; 2336 } 2337 } 2338 2339 destSlice += internal.sliceB; 2340 } 2341 2342 external.unlockRect(); 2343 internal.unlockRect(); 2344 } 2345 decodeATI2(Buffer & internal,Buffer & external)2346 void Surface::decodeATI2(Buffer &internal, Buffer &external) 2347 { 2348 word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2349 const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY); 2350 2351 for(int z = 0; z < external.depth; z++) 2352 { 2353 word *dest = destSlice; 2354 2355 for(int y = 0; y < external.height; y += 4) 2356 { 2357 for(int x = 0; x < external.width; x += 4) 2358 { 2359 byte X[8]; 2360 2361 X[0] = source->x0; 2362 X[1] = source->x1; 2363 2364 if(X[0] > X[1]) 2365 { 2366 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7); 2367 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7); 2368 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7); 2369 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7); 2370 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7); 2371 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7); 2372 } 2373 else 2374 { 2375 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5); 2376 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5); 2377 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5); 2378 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5); 2379 X[6] = 0; 2380 X[7] = 0xFF; 2381 } 2382 2383 byte Y[8]; 2384 2385 Y[0] = source->y0; 2386 Y[1] = source->y1; 2387 2388 if(Y[0] > Y[1]) 2389 { 2390 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7); 2391 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7); 2392 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7); 2393 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7); 2394 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7); 2395 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7); 2396 } 2397 else 2398 { 2399 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5); 2400 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5); 2401 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5); 2402 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5); 2403 Y[6] = 0; 2404 Y[7] = 0xFF; 2405 } 2406 2407 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2408 { 2409 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2410 { 2411 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8]; 2412 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8]; 2413 2414 dest[(x + i) + (y + j) * internal.pitchP] = (g << 8) + r; 2415 } 2416 } 2417 2418 source++; 2419 } 2420 } 2421 2422 (byte*&)destSlice += internal.sliceB; 2423 } 2424 2425 external.unlockRect(); 2426 internal.unlockRect(); 2427 } 2428 decodeETC2(Buffer & internal,Buffer & external,int nbAlphaBits,bool isSRGB)2429 void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB) 2430 { 2431 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2432 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB)); 2433 external.unlockRect(); 2434 internal.unlockRect(); 2435 2436 if(isSRGB) 2437 { 2438 static byte sRGBtoLinearTable[256]; 2439 static bool sRGBtoLinearTableDirty = true; 2440 if(sRGBtoLinearTableDirty) 2441 { 2442 for(int i = 0; i < 256; i++) 2443 { 2444 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f); 2445 } 2446 sRGBtoLinearTableDirty = false; 2447 } 2448 2449 // Perform sRGB conversion in place after decoding 2450 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE); 2451 for(int y = 0; y < internal.height; y++) 2452 { 2453 byte *srcRow = src + y * internal.pitchB; 2454 for(int x = 0; x < internal.width; x++) 2455 { 2456 byte *srcPix = srcRow + x * internal.bytes; 2457 for(int i = 0; i < 3; i++) 2458 { 2459 srcPix[i] = sRGBtoLinearTable[srcPix[i]]; 2460 } 2461 } 2462 } 2463 internal.unlockRect(); 2464 } 2465 } 2466 decodeEAC(Buffer & internal,Buffer & external,int nbChannels,bool isSigned)2467 void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned) 2468 { 2469 ASSERT(nbChannels == 1 || nbChannels == 2); 2470 2471 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE); 2472 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), src, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2473 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED)); 2474 external.unlockRect(); 2475 2476 // FIXME: We convert EAC data to float, until signed short internal formats are supported 2477 // This code can be removed if ETC2 images are decoded to internal 16 bit signed R/RG formats 2478 const float normalization = isSigned ? (1.0f / (8.0f * 127.875f)) : (1.0f / (8.0f * 255.875f)); 2479 for(int y = 0; y < internal.height; y++) 2480 { 2481 byte* srcRow = src + y * internal.pitchB; 2482 for(int x = internal.width - 1; x >= 0; x--) 2483 { 2484 int* srcPix = reinterpret_cast<int*>(srcRow + x * internal.bytes); 2485 float* dstPix = reinterpret_cast<float*>(srcPix); 2486 for(int c = nbChannels - 1; c >= 0; c--) 2487 { 2488 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f); 2489 } 2490 } 2491 } 2492 2493 internal.unlockRect(); 2494 } 2495 size(int width,int height,int depth,int border,int samples,Format format)2496 size_t Surface::size(int width, int height, int depth, int border, int samples, Format format) 2497 { 2498 samples = max(1, samples); 2499 2500 switch(format) 2501 { 2502 default: 2503 { 2504 uint64_t size = (uint64_t)sliceB(width, height, border, format, true) * depth * samples; 2505 2506 // We can only sample buffers smaller than 2 GiB, due to signed 32-bit offset calculations. 2507 // Force an out-of-memory if larger, or let the caller report an error. 2508 if(size >= 0x80000000u) 2509 { 2510 return std::numeric_limits<size_t>::max(); 2511 } 2512 2513 // Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes, 2514 // and stencil operations also read 8 bytes per four 8-bit stencil values, 2515 // so we have to allocate 4 extra bytes to avoid buffer overruns. 2516 // TODO(b/145229887): Eliminate if possible, or don't hard-code. 2517 return static_cast<size_t>(size) + 4; 2518 } 2519 case FORMAT_YV12_BT601: 2520 case FORMAT_YV12_BT709: 2521 case FORMAT_YV12_JFIF: 2522 { 2523 width += 2 * border; 2524 height += 2 * border; 2525 2526 size_t YStride = align<16>(width); 2527 size_t YSize = YStride * height; 2528 size_t CStride = align<16>(YStride / 2); 2529 size_t CSize = CStride * height / 2; 2530 2531 return YSize + 2 * CSize; 2532 } 2533 } 2534 } 2535 isStencil(Format format)2536 bool Surface::isStencil(Format format) 2537 { 2538 switch(format) 2539 { 2540 case FORMAT_D32: 2541 case FORMAT_D16: 2542 case FORMAT_D24X8: 2543 case FORMAT_D32F: 2544 case FORMAT_D32F_COMPLEMENTARY: 2545 case FORMAT_D32F_LOCKABLE: 2546 case FORMAT_D32F_SHADOW: 2547 return false; 2548 case FORMAT_D24S8: 2549 case FORMAT_D24FS8: 2550 case FORMAT_S8: 2551 case FORMAT_DF24S8: 2552 case FORMAT_DF16S8: 2553 case FORMAT_D32FS8_TEXTURE: 2554 case FORMAT_D32FS8_SHADOW: 2555 case FORMAT_D32FS8: 2556 case FORMAT_D32FS8_COMPLEMENTARY: 2557 case FORMAT_INTZ: 2558 return true; 2559 default: 2560 return false; 2561 } 2562 } 2563 isDepth(Format format)2564 bool Surface::isDepth(Format format) 2565 { 2566 switch(format) 2567 { 2568 case FORMAT_D32: 2569 case FORMAT_D16: 2570 case FORMAT_D24X8: 2571 case FORMAT_D24S8: 2572 case FORMAT_D24FS8: 2573 case FORMAT_D32F: 2574 case FORMAT_D32FS8: 2575 case FORMAT_D32F_COMPLEMENTARY: 2576 case FORMAT_D32FS8_COMPLEMENTARY: 2577 case FORMAT_D32F_LOCKABLE: 2578 case FORMAT_DF24S8: 2579 case FORMAT_DF16S8: 2580 case FORMAT_D32FS8_TEXTURE: 2581 case FORMAT_D32F_SHADOW: 2582 case FORMAT_D32FS8_SHADOW: 2583 case FORMAT_INTZ: 2584 return true; 2585 case FORMAT_S8: 2586 return false; 2587 default: 2588 return false; 2589 } 2590 } 2591 hasQuadLayout(Format format)2592 bool Surface::hasQuadLayout(Format format) 2593 { 2594 switch(format) 2595 { 2596 case FORMAT_D32: 2597 case FORMAT_D16: 2598 case FORMAT_D24X8: 2599 case FORMAT_D24S8: 2600 case FORMAT_D24FS8: 2601 case FORMAT_D32F: 2602 case FORMAT_D32FS8: 2603 case FORMAT_D32F_COMPLEMENTARY: 2604 case FORMAT_D32FS8_COMPLEMENTARY: 2605 case FORMAT_DF24S8: 2606 case FORMAT_DF16S8: 2607 case FORMAT_INTZ: 2608 case FORMAT_S8: 2609 case FORMAT_A8G8R8B8Q: 2610 case FORMAT_X8G8R8B8Q: 2611 return true; 2612 case FORMAT_D32F_LOCKABLE: 2613 case FORMAT_D32FS8_TEXTURE: 2614 case FORMAT_D32F_SHADOW: 2615 case FORMAT_D32FS8_SHADOW: 2616 default: 2617 break; 2618 } 2619 2620 return false; 2621 } 2622 isPalette(Format format)2623 bool Surface::isPalette(Format format) 2624 { 2625 switch(format) 2626 { 2627 case FORMAT_P8: 2628 case FORMAT_A8P8: 2629 return true; 2630 default: 2631 return false; 2632 } 2633 } 2634 isFloatFormat(Format format)2635 bool Surface::isFloatFormat(Format format) 2636 { 2637 switch(format) 2638 { 2639 case FORMAT_R5G6B5: 2640 case FORMAT_R8G8B8: 2641 case FORMAT_B8G8R8: 2642 case FORMAT_X8R8G8B8: 2643 case FORMAT_X8B8G8R8I: 2644 case FORMAT_X8B8G8R8: 2645 case FORMAT_A8R8G8B8: 2646 case FORMAT_SRGB8_X8: 2647 case FORMAT_SRGB8_A8: 2648 case FORMAT_A8B8G8R8I: 2649 case FORMAT_R8UI: 2650 case FORMAT_G8R8UI: 2651 case FORMAT_X8B8G8R8UI: 2652 case FORMAT_A8B8G8R8UI: 2653 case FORMAT_A8B8G8R8: 2654 case FORMAT_G8R8I: 2655 case FORMAT_G8R8: 2656 case FORMAT_A2B10G10R10: 2657 case FORMAT_A2B10G10R10UI: 2658 case FORMAT_R8_SNORM: 2659 case FORMAT_G8R8_SNORM: 2660 case FORMAT_X8B8G8R8_SNORM: 2661 case FORMAT_A8B8G8R8_SNORM: 2662 case FORMAT_R16I: 2663 case FORMAT_R16UI: 2664 case FORMAT_G16R16I: 2665 case FORMAT_G16R16UI: 2666 case FORMAT_G16R16: 2667 case FORMAT_X16B16G16R16I: 2668 case FORMAT_X16B16G16R16UI: 2669 case FORMAT_A16B16G16R16I: 2670 case FORMAT_A16B16G16R16UI: 2671 case FORMAT_A16B16G16R16: 2672 case FORMAT_V8U8: 2673 case FORMAT_Q8W8V8U8: 2674 case FORMAT_X8L8V8U8: 2675 case FORMAT_V16U16: 2676 case FORMAT_A16W16V16U16: 2677 case FORMAT_Q16W16V16U16: 2678 case FORMAT_A8: 2679 case FORMAT_R8I: 2680 case FORMAT_R8: 2681 case FORMAT_S8: 2682 case FORMAT_L8: 2683 case FORMAT_L16: 2684 case FORMAT_A8L8: 2685 case FORMAT_YV12_BT601: 2686 case FORMAT_YV12_BT709: 2687 case FORMAT_YV12_JFIF: 2688 case FORMAT_R32I: 2689 case FORMAT_R32UI: 2690 case FORMAT_G32R32I: 2691 case FORMAT_G32R32UI: 2692 case FORMAT_X32B32G32R32I: 2693 case FORMAT_X32B32G32R32UI: 2694 case FORMAT_A32B32G32R32I: 2695 case FORMAT_A32B32G32R32UI: 2696 return false; 2697 case FORMAT_R16F: 2698 case FORMAT_G16R16F: 2699 case FORMAT_B16G16R16F: 2700 case FORMAT_X16B16G16R16F: 2701 case FORMAT_A16B16G16R16F: 2702 case FORMAT_X16B16G16R16F_UNSIGNED: 2703 case FORMAT_R32F: 2704 case FORMAT_G32R32F: 2705 case FORMAT_B32G32R32F: 2706 case FORMAT_X32B32G32R32F: 2707 case FORMAT_A32B32G32R32F: 2708 case FORMAT_X32B32G32R32F_UNSIGNED: 2709 case FORMAT_D32F: 2710 case FORMAT_D32FS8: 2711 case FORMAT_D32F_COMPLEMENTARY: 2712 case FORMAT_D32FS8_COMPLEMENTARY: 2713 case FORMAT_D32F_LOCKABLE: 2714 case FORMAT_D32FS8_TEXTURE: 2715 case FORMAT_D32F_SHADOW: 2716 case FORMAT_D32FS8_SHADOW: 2717 case FORMAT_L16F: 2718 case FORMAT_A16L16F: 2719 case FORMAT_L32F: 2720 case FORMAT_A32L32F: 2721 return true; 2722 default: 2723 ASSERT(false); 2724 } 2725 2726 return false; 2727 } 2728 isUnsignedComponent(Format format,int component)2729 bool Surface::isUnsignedComponent(Format format, int component) 2730 { 2731 switch(format) 2732 { 2733 case FORMAT_NULL: 2734 case FORMAT_R5G6B5: 2735 case FORMAT_R8G8B8: 2736 case FORMAT_B8G8R8: 2737 case FORMAT_X8R8G8B8: 2738 case FORMAT_X8B8G8R8: 2739 case FORMAT_A8R8G8B8: 2740 case FORMAT_A8B8G8R8: 2741 case FORMAT_SRGB8_X8: 2742 case FORMAT_SRGB8_A8: 2743 case FORMAT_G8R8: 2744 case FORMAT_A2B10G10R10: 2745 case FORMAT_A2B10G10R10UI: 2746 case FORMAT_R16UI: 2747 case FORMAT_G16R16: 2748 case FORMAT_G16R16UI: 2749 case FORMAT_X16B16G16R16UI: 2750 case FORMAT_A16B16G16R16: 2751 case FORMAT_A16B16G16R16UI: 2752 case FORMAT_R32UI: 2753 case FORMAT_G32R32UI: 2754 case FORMAT_X32B32G32R32UI: 2755 case FORMAT_A32B32G32R32UI: 2756 case FORMAT_X32B32G32R32F_UNSIGNED: 2757 case FORMAT_R8UI: 2758 case FORMAT_G8R8UI: 2759 case FORMAT_X8B8G8R8UI: 2760 case FORMAT_A8B8G8R8UI: 2761 case FORMAT_D32F: 2762 case FORMAT_D32FS8: 2763 case FORMAT_D32F_COMPLEMENTARY: 2764 case FORMAT_D32FS8_COMPLEMENTARY: 2765 case FORMAT_D32F_LOCKABLE: 2766 case FORMAT_D32FS8_TEXTURE: 2767 case FORMAT_D32F_SHADOW: 2768 case FORMAT_D32FS8_SHADOW: 2769 case FORMAT_A8: 2770 case FORMAT_R8: 2771 case FORMAT_L8: 2772 case FORMAT_L16: 2773 case FORMAT_A8L8: 2774 case FORMAT_YV12_BT601: 2775 case FORMAT_YV12_BT709: 2776 case FORMAT_YV12_JFIF: 2777 return true; 2778 case FORMAT_A8B8G8R8I: 2779 case FORMAT_A16B16G16R16I: 2780 case FORMAT_A32B32G32R32I: 2781 case FORMAT_A8B8G8R8_SNORM: 2782 case FORMAT_Q8W8V8U8: 2783 case FORMAT_Q16W16V16U16: 2784 case FORMAT_A32B32G32R32F: 2785 return false; 2786 case FORMAT_R32F: 2787 case FORMAT_R8I: 2788 case FORMAT_R16I: 2789 case FORMAT_R32I: 2790 case FORMAT_R8_SNORM: 2791 return component >= 1; 2792 case FORMAT_V8U8: 2793 case FORMAT_X8L8V8U8: 2794 case FORMAT_V16U16: 2795 case FORMAT_G32R32F: 2796 case FORMAT_G8R8I: 2797 case FORMAT_G16R16I: 2798 case FORMAT_G32R32I: 2799 case FORMAT_G8R8_SNORM: 2800 return component >= 2; 2801 case FORMAT_A16W16V16U16: 2802 case FORMAT_B32G32R32F: 2803 case FORMAT_X32B32G32R32F: 2804 case FORMAT_X8B8G8R8I: 2805 case FORMAT_X16B16G16R16I: 2806 case FORMAT_X32B32G32R32I: 2807 case FORMAT_X8B8G8R8_SNORM: 2808 return component >= 3; 2809 default: 2810 ASSERT(false); 2811 } 2812 2813 return false; 2814 } 2815 isSRGBreadable(Format format)2816 bool Surface::isSRGBreadable(Format format) 2817 { 2818 // Keep in sync with Capabilities::isSRGBreadable 2819 switch(format) 2820 { 2821 case FORMAT_L8: 2822 case FORMAT_A8L8: 2823 case FORMAT_R8G8B8: 2824 case FORMAT_A8R8G8B8: 2825 case FORMAT_X8R8G8B8: 2826 case FORMAT_A8B8G8R8: 2827 case FORMAT_X8B8G8R8: 2828 case FORMAT_SRGB8_X8: 2829 case FORMAT_SRGB8_A8: 2830 case FORMAT_R5G6B5: 2831 case FORMAT_X1R5G5B5: 2832 case FORMAT_A1R5G5B5: 2833 case FORMAT_A4R4G4B4: 2834 case FORMAT_DXT1: 2835 case FORMAT_DXT3: 2836 case FORMAT_DXT5: 2837 case FORMAT_ATI1: 2838 case FORMAT_ATI2: 2839 return true; 2840 default: 2841 return false; 2842 } 2843 } 2844 isSRGBwritable(Format format)2845 bool Surface::isSRGBwritable(Format format) 2846 { 2847 // Keep in sync with Capabilities::isSRGBwritable 2848 switch(format) 2849 { 2850 case FORMAT_NULL: 2851 case FORMAT_A8R8G8B8: 2852 case FORMAT_X8R8G8B8: 2853 case FORMAT_A8B8G8R8: 2854 case FORMAT_X8B8G8R8: 2855 case FORMAT_SRGB8_X8: 2856 case FORMAT_SRGB8_A8: 2857 case FORMAT_R5G6B5: 2858 return true; 2859 default: 2860 return false; 2861 } 2862 } 2863 isSRGBformat(Format format)2864 bool Surface::isSRGBformat(Format format) 2865 { 2866 switch(format) 2867 { 2868 case FORMAT_SRGB8_X8: 2869 case FORMAT_SRGB8_A8: 2870 return true; 2871 default: 2872 return false; 2873 } 2874 } 2875 isCompressed(Format format)2876 bool Surface::isCompressed(Format format) 2877 { 2878 switch(format) 2879 { 2880 case FORMAT_DXT1: 2881 case FORMAT_DXT3: 2882 case FORMAT_DXT5: 2883 case FORMAT_ATI1: 2884 case FORMAT_ATI2: 2885 case FORMAT_ETC1: 2886 case FORMAT_R11_EAC: 2887 case FORMAT_SIGNED_R11_EAC: 2888 case FORMAT_RG11_EAC: 2889 case FORMAT_SIGNED_RG11_EAC: 2890 case FORMAT_RGB8_ETC2: 2891 case FORMAT_SRGB8_ETC2: 2892 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2893 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2894 case FORMAT_RGBA8_ETC2_EAC: 2895 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2896 return true; 2897 default: 2898 return false; 2899 } 2900 } 2901 isSignedNonNormalizedInteger(Format format)2902 bool Surface::isSignedNonNormalizedInteger(Format format) 2903 { 2904 switch(format) 2905 { 2906 case FORMAT_A8B8G8R8I: 2907 case FORMAT_X8B8G8R8I: 2908 case FORMAT_G8R8I: 2909 case FORMAT_R8I: 2910 case FORMAT_A16B16G16R16I: 2911 case FORMAT_X16B16G16R16I: 2912 case FORMAT_G16R16I: 2913 case FORMAT_R16I: 2914 case FORMAT_A32B32G32R32I: 2915 case FORMAT_X32B32G32R32I: 2916 case FORMAT_G32R32I: 2917 case FORMAT_R32I: 2918 return true; 2919 default: 2920 return false; 2921 } 2922 } 2923 isUnsignedNonNormalizedInteger(Format format)2924 bool Surface::isUnsignedNonNormalizedInteger(Format format) 2925 { 2926 switch(format) 2927 { 2928 case FORMAT_A8B8G8R8UI: 2929 case FORMAT_X8B8G8R8UI: 2930 case FORMAT_G8R8UI: 2931 case FORMAT_R8UI: 2932 case FORMAT_A16B16G16R16UI: 2933 case FORMAT_X16B16G16R16UI: 2934 case FORMAT_G16R16UI: 2935 case FORMAT_R16UI: 2936 case FORMAT_A32B32G32R32UI: 2937 case FORMAT_X32B32G32R32UI: 2938 case FORMAT_G32R32UI: 2939 case FORMAT_R32UI: 2940 return true; 2941 default: 2942 return false; 2943 } 2944 } 2945 isNonNormalizedInteger(Format format)2946 bool Surface::isNonNormalizedInteger(Format format) 2947 { 2948 return isSignedNonNormalizedInteger(format) || 2949 isUnsignedNonNormalizedInteger(format); 2950 } 2951 isNormalizedInteger(Format format)2952 bool Surface::isNormalizedInteger(Format format) 2953 { 2954 return !isFloatFormat(format) && 2955 !isNonNormalizedInteger(format) && 2956 !isCompressed(format) && 2957 !isDepth(format) && 2958 !isStencil(format); 2959 } 2960 componentCount(Format format)2961 int Surface::componentCount(Format format) 2962 { 2963 switch(format) 2964 { 2965 case FORMAT_R5G6B5: return 3; 2966 case FORMAT_X8R8G8B8: return 3; 2967 case FORMAT_X8B8G8R8I: return 3; 2968 case FORMAT_X8B8G8R8: return 3; 2969 case FORMAT_A8R8G8B8: return 4; 2970 case FORMAT_SRGB8_X8: return 3; 2971 case FORMAT_SRGB8_A8: return 4; 2972 case FORMAT_A8B8G8R8I: return 4; 2973 case FORMAT_A8B8G8R8: return 4; 2974 case FORMAT_G8R8I: return 2; 2975 case FORMAT_G8R8: return 2; 2976 case FORMAT_R8_SNORM: return 1; 2977 case FORMAT_G8R8_SNORM: return 2; 2978 case FORMAT_X8B8G8R8_SNORM:return 3; 2979 case FORMAT_A8B8G8R8_SNORM:return 4; 2980 case FORMAT_R8UI: return 1; 2981 case FORMAT_G8R8UI: return 2; 2982 case FORMAT_X8B8G8R8UI: return 3; 2983 case FORMAT_A8B8G8R8UI: return 4; 2984 case FORMAT_A2B10G10R10: return 4; 2985 case FORMAT_A2B10G10R10UI: return 4; 2986 case FORMAT_G16R16I: return 2; 2987 case FORMAT_G16R16UI: return 2; 2988 case FORMAT_G16R16: return 2; 2989 case FORMAT_G32R32I: return 2; 2990 case FORMAT_G32R32UI: return 2; 2991 case FORMAT_X16B16G16R16I: return 3; 2992 case FORMAT_X16B16G16R16UI: return 3; 2993 case FORMAT_A16B16G16R16I: return 4; 2994 case FORMAT_A16B16G16R16UI: return 4; 2995 case FORMAT_A16B16G16R16: return 4; 2996 case FORMAT_X32B32G32R32I: return 3; 2997 case FORMAT_X32B32G32R32UI: return 3; 2998 case FORMAT_A32B32G32R32I: return 4; 2999 case FORMAT_A32B32G32R32UI: return 4; 3000 case FORMAT_V8U8: return 2; 3001 case FORMAT_Q8W8V8U8: return 4; 3002 case FORMAT_X8L8V8U8: return 3; 3003 case FORMAT_V16U16: return 2; 3004 case FORMAT_A16W16V16U16: return 4; 3005 case FORMAT_Q16W16V16U16: return 4; 3006 case FORMAT_R32F: return 1; 3007 case FORMAT_G32R32F: return 2; 3008 case FORMAT_X32B32G32R32F: return 3; 3009 case FORMAT_A32B32G32R32F: return 4; 3010 case FORMAT_X32B32G32R32F_UNSIGNED: return 3; 3011 case FORMAT_D32F: return 1; 3012 case FORMAT_D32FS8: return 1; 3013 case FORMAT_D32F_LOCKABLE: return 1; 3014 case FORMAT_D32FS8_TEXTURE: return 1; 3015 case FORMAT_D32F_SHADOW: return 1; 3016 case FORMAT_D32FS8_SHADOW: return 1; 3017 case FORMAT_A8: return 1; 3018 case FORMAT_R8I: return 1; 3019 case FORMAT_R8: return 1; 3020 case FORMAT_R16I: return 1; 3021 case FORMAT_R16UI: return 1; 3022 case FORMAT_R32I: return 1; 3023 case FORMAT_R32UI: return 1; 3024 case FORMAT_L8: return 1; 3025 case FORMAT_L16: return 1; 3026 case FORMAT_A8L8: return 2; 3027 case FORMAT_YV12_BT601: return 3; 3028 case FORMAT_YV12_BT709: return 3; 3029 case FORMAT_YV12_JFIF: return 3; 3030 default: 3031 ASSERT(false); 3032 } 3033 3034 return 1; 3035 } 3036 allocateBuffer(int width,int height,int depth,int border,int samples,Format format)3037 void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format) 3038 { 3039 return allocate(size(width, height, depth, border, samples, format)); 3040 } 3041 memfill4(void * buffer,int pattern,int bytes)3042 void Surface::memfill4(void *buffer, int pattern, int bytes) 3043 { 3044 while((size_t)buffer & 0x1 && bytes >= 1) 3045 { 3046 *(char*)buffer = (char)pattern; 3047 (char*&)buffer += 1; 3048 bytes -= 1; 3049 } 3050 3051 while((size_t)buffer & 0x3 && bytes >= 2) 3052 { 3053 *(short*)buffer = (short)pattern; 3054 (short*&)buffer += 1; 3055 bytes -= 2; 3056 } 3057 3058 #if defined(__i386__) || defined(__x86_64__) 3059 if(CPUID::supportsSSE()) 3060 { 3061 while((size_t)buffer & 0xF && bytes >= 4) 3062 { 3063 *(int*)buffer = pattern; 3064 (int*&)buffer += 1; 3065 bytes -= 4; 3066 } 3067 3068 __m128 quad = _mm_set_ps1((float&)pattern); 3069 3070 float *pointer = (float*)buffer; 3071 int qxwords = bytes / 64; 3072 bytes -= qxwords * 64; 3073 3074 while(qxwords--) 3075 { 3076 _mm_stream_ps(pointer + 0, quad); 3077 _mm_stream_ps(pointer + 4, quad); 3078 _mm_stream_ps(pointer + 8, quad); 3079 _mm_stream_ps(pointer + 12, quad); 3080 3081 pointer += 16; 3082 } 3083 3084 buffer = pointer; 3085 } 3086 #endif 3087 3088 while(bytes >= 4) 3089 { 3090 *(int*)buffer = (int)pattern; 3091 (int*&)buffer += 1; 3092 bytes -= 4; 3093 } 3094 3095 while(bytes >= 2) 3096 { 3097 *(short*)buffer = (short)pattern; 3098 (short*&)buffer += 1; 3099 bytes -= 2; 3100 } 3101 3102 while(bytes >= 1) 3103 { 3104 *(char*)buffer = (char)pattern; 3105 (char*&)buffer += 1; 3106 bytes -= 1; 3107 } 3108 } 3109 sync()3110 void Surface::sync() 3111 { 3112 resource->lock(EXCLUSIVE); 3113 resource->unlock(); 3114 } 3115 isEntire(const Rect & rect) const3116 bool Surface::isEntire(const Rect& rect) const 3117 { 3118 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1); 3119 } 3120 getRect() const3121 Rect Surface::getRect() const 3122 { 3123 return Rect(0, 0, internal.width, internal.height); 3124 } 3125 clearDepth(float depth,int x0,int y0,int width,int height)3126 void Surface::clearDepth(float depth, int x0, int y0, int width, int height) 3127 { 3128 if(width == 0 || height == 0) 3129 { 3130 return; 3131 } 3132 3133 if(internal.format == FORMAT_NULL) 3134 { 3135 return; 3136 } 3137 3138 // Not overlapping 3139 if(x0 > internal.width) return; 3140 if(y0 > internal.height) return; 3141 if(x0 + width < 0) return; 3142 if(y0 + height < 0) return; 3143 3144 // Clip against dimensions 3145 if(x0 < 0) {width += x0; x0 = 0;} 3146 if(x0 + width > internal.width) width = internal.width - x0; 3147 if(y0 < 0) {height += y0; y0 = 0;} 3148 if(y0 + height > internal.height) height = internal.height - y0; 3149 3150 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 3151 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 3152 3153 int x1 = x0 + width; 3154 int y1 = y0 + height; 3155 3156 if(!hasQuadLayout(internal.format)) 3157 { 3158 float *target = (float*)lockInternal(x0, y0, 0, lock, PUBLIC); 3159 3160 for(int z = 0; z < internal.samples; z++) 3161 { 3162 float *row = target; 3163 for(int y = y0; y < y1; y++) 3164 { 3165 memfill4(row, (int&)depth, width * sizeof(float)); 3166 row += internal.pitchP; 3167 } 3168 target += internal.sliceP; 3169 } 3170 3171 unlockInternal(); 3172 } 3173 else // Quad layout 3174 { 3175 if(complementaryDepthBuffer) 3176 { 3177 depth = 1 - depth; 3178 } 3179 3180 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC); 3181 3182 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3183 int oddX1 = (x1 & ~1) * 2; 3184 int evenX0 = ((x0 + 1) & ~1) * 2; 3185 int evenBytes = (oddX1 - evenX0) * sizeof(float); 3186 3187 for(int z = 0; z < internal.samples; z++) 3188 { 3189 for(int y = y0; y < y1; y++) 3190 { 3191 float *target = buffer + (y & ~1) * internal.pitchP + (y & 1) * 2; 3192 3193 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 3194 { 3195 if((x0 & 1) != 0) 3196 { 3197 target[oddX0 + 0] = depth; 3198 target[oddX0 + 2] = depth; 3199 } 3200 3201 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4) 3202 // { 3203 // target[x2 + 0] = depth; 3204 // target[x2 + 1] = depth; 3205 // target[x2 + 2] = depth; 3206 // target[x2 + 3] = depth; 3207 // } 3208 3209 // __asm 3210 // { 3211 // movss xmm0, depth 3212 // shufps xmm0, xmm0, 0x00 3213 // 3214 // mov eax, x0 3215 // add eax, 1 3216 // and eax, 0xFFFFFFFE 3217 // cmp eax, x1 3218 // jge qEnd 3219 // 3220 // mov edi, target 3221 // 3222 // qLoop: 3223 // movntps [edi+8*eax], xmm0 3224 // 3225 // add eax, 2 3226 // cmp eax, x1 3227 // jl qLoop 3228 // qEnd: 3229 // } 3230 3231 memfill4(&target[evenX0], (int&)depth, evenBytes); 3232 3233 if((x1 & 1) != 0) 3234 { 3235 target[oddX1 + 0] = depth; 3236 target[oddX1 + 2] = depth; 3237 } 3238 3239 y++; 3240 } 3241 else 3242 { 3243 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3244 { 3245 target[i] = depth; 3246 } 3247 } 3248 } 3249 3250 buffer += internal.sliceP; 3251 } 3252 3253 unlockInternal(); 3254 } 3255 } 3256 clearStencil(unsigned char s,unsigned char mask,int x0,int y0,int width,int height)3257 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height) 3258 { 3259 if(mask == 0 || width == 0 || height == 0) 3260 { 3261 return; 3262 } 3263 3264 if(stencil.format == FORMAT_NULL) 3265 { 3266 return; 3267 } 3268 3269 // Not overlapping 3270 if(x0 > internal.width) return; 3271 if(y0 > internal.height) return; 3272 if(x0 + width < 0) return; 3273 if(y0 + height < 0) return; 3274 3275 // Clip against dimensions 3276 if(x0 < 0) {width += x0; x0 = 0;} 3277 if(x0 + width > internal.width) width = internal.width - x0; 3278 if(y0 < 0) {height += y0; y0 = 0;} 3279 if(y0 + height > internal.height) height = internal.height - y0; 3280 3281 int x1 = x0 + width; 3282 int y1 = y0 + height; 3283 3284 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3285 int oddX1 = (x1 & ~1) * 2; 3286 int evenX0 = ((x0 + 1) & ~1) * 2; 3287 int evenBytes = oddX1 - evenX0; 3288 3289 unsigned char maskedS = s & mask; 3290 unsigned char invMask = ~mask; 3291 unsigned int fill = maskedS; 3292 fill = fill | (fill << 8) | (fill << 16) | (fill << 24); 3293 3294 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC); 3295 3296 // Stencil buffers are assumed to use quad layout 3297 for(int z = 0; z < stencil.samples; z++) 3298 { 3299 for(int y = y0; y < y1; y++) 3300 { 3301 char *target = buffer + (y & ~1) * stencil.pitchP + (y & 1) * 2; 3302 3303 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once 3304 { 3305 if((x0 & 1) != 0) 3306 { 3307 target[oddX0 + 0] = fill; 3308 target[oddX0 + 2] = fill; 3309 } 3310 3311 memfill4(&target[evenX0], fill, evenBytes); 3312 3313 if((x1 & 1) != 0) 3314 { 3315 target[oddX1 + 0] = fill; 3316 target[oddX1 + 2] = fill; 3317 } 3318 3319 y++; 3320 } 3321 else 3322 { 3323 for(int x = x0; x < x1; x++) 3324 { 3325 int i = (x & ~1) * 2 + (x & 1); 3326 target[i] = maskedS | (target[i] & invMask); 3327 } 3328 } 3329 } 3330 3331 buffer += stencil.sliceP; 3332 } 3333 3334 unlockStencil(); 3335 } 3336 fill(const Color<float> & color,int x0,int y0,int width,int height)3337 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height) 3338 { 3339 unsigned char *row; 3340 Buffer *buffer; 3341 3342 if(internal.dirty) 3343 { 3344 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3345 buffer = &internal; 3346 } 3347 else 3348 { 3349 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3350 buffer = &external; 3351 } 3352 3353 if(buffer->bytes <= 4) 3354 { 3355 int c; 3356 buffer->write(&c, color); 3357 3358 if(buffer->bytes <= 1) c = (c << 8) | c; 3359 if(buffer->bytes <= 2) c = (c << 16) | c; 3360 3361 for(int y = 0; y < height; y++) 3362 { 3363 memfill4(row, c, width * buffer->bytes); 3364 3365 row += buffer->pitchB; 3366 } 3367 } 3368 else // Generic 3369 { 3370 for(int y = 0; y < height; y++) 3371 { 3372 unsigned char *element = row; 3373 3374 for(int x = 0; x < width; x++) 3375 { 3376 buffer->write(element, color); 3377 3378 element += buffer->bytes; 3379 } 3380 3381 row += buffer->pitchB; 3382 } 3383 } 3384 3385 if(buffer == &internal) 3386 { 3387 unlockInternal(); 3388 } 3389 else 3390 { 3391 unlockExternal(); 3392 } 3393 } 3394 copyInternal(const Surface * source,int x,int y,float srcX,float srcY,bool filter)3395 void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter) 3396 { 3397 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3398 3399 sw::Color<float> color; 3400 3401 if(!filter) 3402 { 3403 color = source->internal.read((int)srcX, (int)srcY, 0); 3404 } 3405 else // Bilinear filtering 3406 { 3407 color = source->internal.sample(srcX, srcY, 0); 3408 } 3409 3410 internal.write(x, y, color); 3411 } 3412 copyInternal(const Surface * source,int x,int y,int z,float srcX,float srcY,float srcZ,bool filter)3413 void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter) 3414 { 3415 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3416 3417 sw::Color<float> color; 3418 3419 if(!filter) 3420 { 3421 color = source->internal.read((int)srcX, (int)srcY, int(srcZ)); 3422 } 3423 else // Bilinear filtering 3424 { 3425 color = source->internal.sample(srcX, srcY, srcZ); 3426 } 3427 3428 internal.write(x, y, z, color); 3429 } 3430 copyCubeEdge(Edge dstEdge,Surface * src,Edge srcEdge)3431 void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge) 3432 { 3433 Surface *dst = this; 3434 3435 // Figure out if the edges to be copied in reverse order respectively from one another 3436 // The copy should be reversed whenever the same edges are contiguous or if we're 3437 // copying top <-> right or bottom <-> left. This is explained by the layout, which is: 3438 // 3439 // | +y | 3440 // | -x | +z | +x | -z | 3441 // | -y | 3442 3443 bool reverse = (srcEdge == dstEdge) || 3444 ((srcEdge == TOP) && (dstEdge == RIGHT)) || 3445 ((srcEdge == RIGHT) && (dstEdge == TOP)) || 3446 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) || 3447 ((srcEdge == LEFT) && (dstEdge == BOTTOM)); 3448 3449 int srcBytes = src->bytes(src->Surface::getInternalFormat()); 3450 int srcPitch = src->getInternalPitchB(); 3451 int dstBytes = dst->bytes(dst->Surface::getInternalFormat()); 3452 int dstPitch = dst->getInternalPitchB(); 3453 3454 int srcW = src->getWidth(); 3455 int srcH = src->getHeight(); 3456 int dstW = dst->getWidth(); 3457 int dstH = dst->getHeight(); 3458 3459 ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes); 3460 3461 // Src is expressed in the regular [0, width-1], [0, height-1] space 3462 int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch; 3463 int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0)); 3464 3465 // Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space 3466 int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1); 3467 int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta); 3468 3469 char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart; 3470 char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart; 3471 3472 for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta) 3473 { 3474 memcpy(dstBuf, srcBuf, srcBytes); 3475 } 3476 3477 if(dstEdge == LEFT || dstEdge == RIGHT) 3478 { 3479 // TOP and BOTTOM are already set, let's average out the corners 3480 int x0 = (dstEdge == RIGHT) ? dstW : -1; 3481 int y0 = -1; 3482 int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0; 3483 int y1 = 0; 3484 dst->computeCubeCorner(x0, y0, x1, y1); 3485 y0 = dstH; 3486 y1 = dstH - 1; 3487 dst->computeCubeCorner(x0, y0, x1, y1); 3488 } 3489 3490 src->unlockInternal(); 3491 dst->unlockInternal(); 3492 } 3493 computeCubeCorner(int x0,int y0,int x1,int y1)3494 void Surface::computeCubeCorner(int x0, int y0, int x1, int y1) 3495 { 3496 ASSERT(internal.lock != LOCK_UNLOCKED); 3497 3498 sw::Color<float> color = internal.read(x0, y1); 3499 color += internal.read(x1, y0); 3500 color += internal.read(x1, y1); 3501 color *= (1.0f / 3.0f); 3502 3503 internal.write(x0, y0, color); 3504 } 3505 hasStencil() const3506 bool Surface::hasStencil() const 3507 { 3508 return isStencil(external.format); 3509 } 3510 hasDepth() const3511 bool Surface::hasDepth() const 3512 { 3513 return isDepth(external.format); 3514 } 3515 hasPalette() const3516 bool Surface::hasPalette() const 3517 { 3518 return isPalette(external.format); 3519 } 3520 isRenderTarget() const3521 bool Surface::isRenderTarget() const 3522 { 3523 return renderTarget; 3524 } 3525 hasDirtyContents() const3526 bool Surface::hasDirtyContents() const 3527 { 3528 return dirtyContents; 3529 } 3530 markContentsClean()3531 void Surface::markContentsClean() 3532 { 3533 dirtyContents = false; 3534 } 3535 getResource()3536 Resource *Surface::getResource() 3537 { 3538 return resource; 3539 } 3540 identicalBuffers() const3541 bool Surface::identicalBuffers() const 3542 { 3543 return external.format == internal.format && 3544 external.width == internal.width && 3545 external.height == internal.height && 3546 external.depth == internal.depth && 3547 external.pitchB == internal.pitchB && 3548 external.sliceB == internal.sliceB && 3549 external.border == internal.border && 3550 external.samples == internal.samples; 3551 } 3552 selectInternalFormat(Format format) const3553 Format Surface::selectInternalFormat(Format format) const 3554 { 3555 switch(format) 3556 { 3557 case FORMAT_NULL: 3558 return FORMAT_NULL; 3559 case FORMAT_P8: 3560 case FORMAT_A8P8: 3561 case FORMAT_A4R4G4B4: 3562 case FORMAT_A1R5G5B5: 3563 case FORMAT_A8R3G3B2: 3564 return FORMAT_A8R8G8B8; 3565 case FORMAT_A8: 3566 return FORMAT_A8; 3567 case FORMAT_R8I: 3568 return FORMAT_R8I; 3569 case FORMAT_R8UI: 3570 return FORMAT_R8UI; 3571 case FORMAT_R8_SNORM: 3572 return FORMAT_R8_SNORM; 3573 case FORMAT_R8: 3574 return FORMAT_R8; 3575 case FORMAT_R16I: 3576 return FORMAT_R16I; 3577 case FORMAT_R16UI: 3578 return FORMAT_R16UI; 3579 case FORMAT_R32I: 3580 return FORMAT_R32I; 3581 case FORMAT_R32UI: 3582 return FORMAT_R32UI; 3583 case FORMAT_X16B16G16R16I: 3584 return FORMAT_X16B16G16R16I; 3585 case FORMAT_A16B16G16R16I: 3586 return FORMAT_A16B16G16R16I; 3587 case FORMAT_X16B16G16R16UI: 3588 return FORMAT_X16B16G16R16UI; 3589 case FORMAT_A16B16G16R16UI: 3590 return FORMAT_A16B16G16R16UI; 3591 case FORMAT_A2R10G10B10: 3592 case FORMAT_A2B10G10R10: 3593 case FORMAT_A16B16G16R16: 3594 return FORMAT_A16B16G16R16; 3595 case FORMAT_A2B10G10R10UI: 3596 return FORMAT_A16B16G16R16UI; 3597 case FORMAT_X32B32G32R32I: 3598 return FORMAT_X32B32G32R32I; 3599 case FORMAT_A32B32G32R32I: 3600 return FORMAT_A32B32G32R32I; 3601 case FORMAT_X32B32G32R32UI: 3602 return FORMAT_X32B32G32R32UI; 3603 case FORMAT_A32B32G32R32UI: 3604 return FORMAT_A32B32G32R32UI; 3605 case FORMAT_G8R8I: 3606 return FORMAT_G8R8I; 3607 case FORMAT_G8R8UI: 3608 return FORMAT_G8R8UI; 3609 case FORMAT_G8R8_SNORM: 3610 return FORMAT_G8R8_SNORM; 3611 case FORMAT_G8R8: 3612 return FORMAT_G8R8; 3613 case FORMAT_G16R16I: 3614 return FORMAT_G16R16I; 3615 case FORMAT_G16R16UI: 3616 return FORMAT_G16R16UI; 3617 case FORMAT_G16R16: 3618 return FORMAT_G16R16; 3619 case FORMAT_G32R32I: 3620 return FORMAT_G32R32I; 3621 case FORMAT_G32R32UI: 3622 return FORMAT_G32R32UI; 3623 case FORMAT_A8R8G8B8: 3624 if(lockable || !quadLayoutEnabled) 3625 { 3626 return FORMAT_A8R8G8B8; 3627 } 3628 else 3629 { 3630 return FORMAT_A8G8R8B8Q; 3631 } 3632 case FORMAT_A8B8G8R8I: 3633 return FORMAT_A8B8G8R8I; 3634 case FORMAT_A8B8G8R8UI: 3635 return FORMAT_A8B8G8R8UI; 3636 case FORMAT_A8B8G8R8_SNORM: 3637 return FORMAT_A8B8G8R8_SNORM; 3638 case FORMAT_R5G5B5A1: 3639 case FORMAT_R4G4B4A4: 3640 case FORMAT_A8B8G8R8: 3641 return FORMAT_A8B8G8R8; 3642 case FORMAT_R5G6B5: 3643 return FORMAT_R5G6B5; 3644 case FORMAT_R3G3B2: 3645 case FORMAT_R8G8B8: 3646 case FORMAT_X4R4G4B4: 3647 case FORMAT_X1R5G5B5: 3648 case FORMAT_X8R8G8B8: 3649 if(lockable || !quadLayoutEnabled) 3650 { 3651 return FORMAT_X8R8G8B8; 3652 } 3653 else 3654 { 3655 return FORMAT_X8G8R8B8Q; 3656 } 3657 case FORMAT_X8B8G8R8I: 3658 return FORMAT_X8B8G8R8I; 3659 case FORMAT_X8B8G8R8UI: 3660 return FORMAT_X8B8G8R8UI; 3661 case FORMAT_X8B8G8R8_SNORM: 3662 return FORMAT_X8B8G8R8_SNORM; 3663 case FORMAT_B8G8R8: 3664 case FORMAT_X8B8G8R8: 3665 return FORMAT_X8B8G8R8; 3666 case FORMAT_SRGB8_X8: 3667 return FORMAT_SRGB8_X8; 3668 case FORMAT_SRGB8_A8: 3669 return FORMAT_SRGB8_A8; 3670 // Compressed formats 3671 case FORMAT_DXT1: 3672 case FORMAT_DXT3: 3673 case FORMAT_DXT5: 3674 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3675 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3676 case FORMAT_RGBA8_ETC2_EAC: 3677 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3678 return FORMAT_A8R8G8B8; 3679 case FORMAT_ATI1: 3680 return FORMAT_R8; 3681 case FORMAT_R11_EAC: 3682 case FORMAT_SIGNED_R11_EAC: 3683 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient 3684 case FORMAT_ATI2: 3685 return FORMAT_G8R8; 3686 case FORMAT_RG11_EAC: 3687 case FORMAT_SIGNED_RG11_EAC: 3688 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient 3689 case FORMAT_ETC1: 3690 case FORMAT_RGB8_ETC2: 3691 case FORMAT_SRGB8_ETC2: 3692 return FORMAT_X8R8G8B8; 3693 // Bumpmap formats 3694 case FORMAT_V8U8: return FORMAT_V8U8; 3695 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8; 3696 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8; 3697 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8; 3698 case FORMAT_V16U16: return FORMAT_V16U16; 3699 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16; 3700 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16; 3701 // Floating-point formats 3702 case FORMAT_A16F: return FORMAT_A32B32G32R32F; 3703 case FORMAT_R16F: return FORMAT_R32F; 3704 case FORMAT_G16R16F: return FORMAT_G32R32F; 3705 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F; 3706 case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F; 3707 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; 3708 case FORMAT_X16B16G16R16F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED; 3709 case FORMAT_A32F: return FORMAT_A32B32G32R32F; 3710 case FORMAT_R32F: return FORMAT_R32F; 3711 case FORMAT_G32R32F: return FORMAT_G32R32F; 3712 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F; 3713 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F; 3714 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F; 3715 case FORMAT_X32B32G32R32F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED; 3716 // Luminance formats 3717 case FORMAT_L8: return FORMAT_L8; 3718 case FORMAT_A4L4: return FORMAT_A8L8; 3719 case FORMAT_L16: return FORMAT_L16; 3720 case FORMAT_A8L8: return FORMAT_A8L8; 3721 case FORMAT_L16F: return FORMAT_X32B32G32R32F; 3722 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F; 3723 case FORMAT_L32F: return FORMAT_X32B32G32R32F; 3724 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F; 3725 // Depth/stencil formats 3726 case FORMAT_D16: 3727 case FORMAT_D32: 3728 case FORMAT_D24X8: 3729 if(hasParent) // Texture 3730 { 3731 return FORMAT_D32F_SHADOW; 3732 } 3733 else if(complementaryDepthBuffer) 3734 { 3735 return FORMAT_D32F_COMPLEMENTARY; 3736 } 3737 else 3738 { 3739 return FORMAT_D32F; 3740 } 3741 case FORMAT_D24S8: 3742 case FORMAT_D24FS8: 3743 if(hasParent) // Texture 3744 { 3745 return FORMAT_D32FS8_SHADOW; 3746 } 3747 else if(complementaryDepthBuffer) 3748 { 3749 return FORMAT_D32FS8_COMPLEMENTARY; 3750 } 3751 else 3752 { 3753 return FORMAT_D32FS8; 3754 } 3755 case FORMAT_D32F: return FORMAT_D32F; 3756 case FORMAT_D32FS8: return FORMAT_D32FS8; 3757 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE; 3758 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE; 3759 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; 3760 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; 3761 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; 3762 case FORMAT_S8: return FORMAT_S8; 3763 // YUV formats 3764 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; 3765 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; 3766 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; 3767 default: 3768 ASSERT(false); 3769 } 3770 3771 return FORMAT_NULL; 3772 } 3773 setTexturePalette(unsigned int * palette)3774 void Surface::setTexturePalette(unsigned int *palette) 3775 { 3776 Surface::palette = palette; 3777 Surface::paletteID++; 3778 } 3779 resolve()3780 void Surface::resolve() 3781 { 3782 if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL) 3783 { 3784 return; 3785 } 3786 3787 ASSERT(internal.depth == 1); // Unimplemented 3788 3789 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE); 3790 3791 int width = internal.width; 3792 int height = internal.height; 3793 int pitch = internal.pitchB; 3794 int slice = internal.sliceB; 3795 3796 unsigned char *source0 = (unsigned char*)source; 3797 unsigned char *source1 = source0 + slice; 3798 unsigned char *source2 = source1 + slice; 3799 unsigned char *source3 = source2 + slice; 3800 unsigned char *source4 = source3 + slice; 3801 unsigned char *source5 = source4 + slice; 3802 unsigned char *source6 = source5 + slice; 3803 unsigned char *source7 = source6 + slice; 3804 unsigned char *source8 = source7 + slice; 3805 unsigned char *source9 = source8 + slice; 3806 unsigned char *sourceA = source9 + slice; 3807 unsigned char *sourceB = sourceA + slice; 3808 unsigned char *sourceC = sourceB + slice; 3809 unsigned char *sourceD = sourceC + slice; 3810 unsigned char *sourceE = sourceD + slice; 3811 unsigned char *sourceF = sourceE + slice; 3812 3813 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || 3814 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 || 3815 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8) 3816 { 3817 #if defined(__i386__) || defined(__x86_64__) 3818 if(CPUID::supportsSSE2() && (width % 4) == 0) 3819 { 3820 if(internal.samples == 2) 3821 { 3822 for(int y = 0; y < height; y++) 3823 { 3824 for(int x = 0; x < width; x += 4) 3825 { 3826 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3827 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3828 3829 c0 = _mm_avg_epu8(c0, c1); 3830 3831 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3832 } 3833 3834 source0 += pitch; 3835 source1 += pitch; 3836 } 3837 } 3838 else if(internal.samples == 4) 3839 { 3840 for(int y = 0; y < height; y++) 3841 { 3842 for(int x = 0; x < width; x += 4) 3843 { 3844 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3845 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3846 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3847 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3848 3849 c0 = _mm_avg_epu8(c0, c1); 3850 c2 = _mm_avg_epu8(c2, c3); 3851 c0 = _mm_avg_epu8(c0, c2); 3852 3853 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3854 } 3855 3856 source0 += pitch; 3857 source1 += pitch; 3858 source2 += pitch; 3859 source3 += pitch; 3860 } 3861 } 3862 else if(internal.samples == 8) 3863 { 3864 for(int y = 0; y < height; y++) 3865 { 3866 for(int x = 0; x < width; x += 4) 3867 { 3868 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3869 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3870 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3871 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3872 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3873 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3874 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3875 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3876 3877 c0 = _mm_avg_epu8(c0, c1); 3878 c2 = _mm_avg_epu8(c2, c3); 3879 c4 = _mm_avg_epu8(c4, c5); 3880 c6 = _mm_avg_epu8(c6, c7); 3881 c0 = _mm_avg_epu8(c0, c2); 3882 c4 = _mm_avg_epu8(c4, c6); 3883 c0 = _mm_avg_epu8(c0, c4); 3884 3885 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3886 } 3887 3888 source0 += pitch; 3889 source1 += pitch; 3890 source2 += pitch; 3891 source3 += pitch; 3892 source4 += pitch; 3893 source5 += pitch; 3894 source6 += pitch; 3895 source7 += pitch; 3896 } 3897 } 3898 else if(internal.samples == 16) 3899 { 3900 for(int y = 0; y < height; y++) 3901 { 3902 for(int x = 0; x < width; x += 4) 3903 { 3904 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3905 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3906 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3907 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3908 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3909 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3910 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3911 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3912 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 3913 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 3914 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 3915 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 3916 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 3917 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 3918 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 3919 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 3920 3921 c0 = _mm_avg_epu8(c0, c1); 3922 c2 = _mm_avg_epu8(c2, c3); 3923 c4 = _mm_avg_epu8(c4, c5); 3924 c6 = _mm_avg_epu8(c6, c7); 3925 c8 = _mm_avg_epu8(c8, c9); 3926 cA = _mm_avg_epu8(cA, cB); 3927 cC = _mm_avg_epu8(cC, cD); 3928 cE = _mm_avg_epu8(cE, cF); 3929 c0 = _mm_avg_epu8(c0, c2); 3930 c4 = _mm_avg_epu8(c4, c6); 3931 c8 = _mm_avg_epu8(c8, cA); 3932 cC = _mm_avg_epu8(cC, cE); 3933 c0 = _mm_avg_epu8(c0, c4); 3934 c8 = _mm_avg_epu8(c8, cC); 3935 c0 = _mm_avg_epu8(c0, c8); 3936 3937 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3938 } 3939 3940 source0 += pitch; 3941 source1 += pitch; 3942 source2 += pitch; 3943 source3 += pitch; 3944 source4 += pitch; 3945 source5 += pitch; 3946 source6 += pitch; 3947 source7 += pitch; 3948 source8 += pitch; 3949 source9 += pitch; 3950 sourceA += pitch; 3951 sourceB += pitch; 3952 sourceC += pitch; 3953 sourceD += pitch; 3954 sourceE += pitch; 3955 sourceF += pitch; 3956 } 3957 } 3958 else ASSERT(false); 3959 } 3960 else 3961 #endif 3962 { 3963 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101)) 3964 3965 if(internal.samples == 2) 3966 { 3967 for(int y = 0; y < height; y++) 3968 { 3969 for(int x = 0; x < width; x++) 3970 { 3971 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3972 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3973 3974 c0 = AVERAGE(c0, c1); 3975 3976 *(unsigned int*)(source0 + 4 * x) = c0; 3977 } 3978 3979 source0 += pitch; 3980 source1 += pitch; 3981 } 3982 } 3983 else if(internal.samples == 4) 3984 { 3985 for(int y = 0; y < height; y++) 3986 { 3987 for(int x = 0; x < width; x++) 3988 { 3989 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3990 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3991 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3992 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3993 3994 c0 = AVERAGE(c0, c1); 3995 c2 = AVERAGE(c2, c3); 3996 c0 = AVERAGE(c0, c2); 3997 3998 *(unsigned int*)(source0 + 4 * x) = c0; 3999 } 4000 4001 source0 += pitch; 4002 source1 += pitch; 4003 source2 += pitch; 4004 source3 += pitch; 4005 } 4006 } 4007 else if(internal.samples == 8) 4008 { 4009 for(int y = 0; y < height; y++) 4010 { 4011 for(int x = 0; x < width; x++) 4012 { 4013 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4014 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4015 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4016 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4017 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4018 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4019 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4020 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4021 4022 c0 = AVERAGE(c0, c1); 4023 c2 = AVERAGE(c2, c3); 4024 c4 = AVERAGE(c4, c5); 4025 c6 = AVERAGE(c6, c7); 4026 c0 = AVERAGE(c0, c2); 4027 c4 = AVERAGE(c4, c6); 4028 c0 = AVERAGE(c0, c4); 4029 4030 *(unsigned int*)(source0 + 4 * x) = c0; 4031 } 4032 4033 source0 += pitch; 4034 source1 += pitch; 4035 source2 += pitch; 4036 source3 += pitch; 4037 source4 += pitch; 4038 source5 += pitch; 4039 source6 += pitch; 4040 source7 += pitch; 4041 } 4042 } 4043 else if(internal.samples == 16) 4044 { 4045 for(int y = 0; y < height; y++) 4046 { 4047 for(int x = 0; x < width; x++) 4048 { 4049 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4050 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4051 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4052 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4053 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4054 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4055 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4056 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4057 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4058 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4059 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4060 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4061 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4062 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4063 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4064 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4065 4066 c0 = AVERAGE(c0, c1); 4067 c2 = AVERAGE(c2, c3); 4068 c4 = AVERAGE(c4, c5); 4069 c6 = AVERAGE(c6, c7); 4070 c8 = AVERAGE(c8, c9); 4071 cA = AVERAGE(cA, cB); 4072 cC = AVERAGE(cC, cD); 4073 cE = AVERAGE(cE, cF); 4074 c0 = AVERAGE(c0, c2); 4075 c4 = AVERAGE(c4, c6); 4076 c8 = AVERAGE(c8, cA); 4077 cC = AVERAGE(cC, cE); 4078 c0 = AVERAGE(c0, c4); 4079 c8 = AVERAGE(c8, cC); 4080 c0 = AVERAGE(c0, c8); 4081 4082 *(unsigned int*)(source0 + 4 * x) = c0; 4083 } 4084 4085 source0 += pitch; 4086 source1 += pitch; 4087 source2 += pitch; 4088 source3 += pitch; 4089 source4 += pitch; 4090 source5 += pitch; 4091 source6 += pitch; 4092 source7 += pitch; 4093 source8 += pitch; 4094 source9 += pitch; 4095 sourceA += pitch; 4096 sourceB += pitch; 4097 sourceC += pitch; 4098 sourceD += pitch; 4099 sourceE += pitch; 4100 sourceF += pitch; 4101 } 4102 } 4103 else ASSERT(false); 4104 4105 #undef AVERAGE 4106 } 4107 } 4108 else if(internal.format == FORMAT_G16R16) 4109 { 4110 4111 #if defined(__i386__) || defined(__x86_64__) 4112 if(CPUID::supportsSSE2() && (width % 4) == 0) 4113 { 4114 if(internal.samples == 2) 4115 { 4116 for(int y = 0; y < height; y++) 4117 { 4118 for(int x = 0; x < width; x += 4) 4119 { 4120 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4121 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4122 4123 c0 = _mm_avg_epu16(c0, c1); 4124 4125 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4126 } 4127 4128 source0 += pitch; 4129 source1 += pitch; 4130 } 4131 } 4132 else if(internal.samples == 4) 4133 { 4134 for(int y = 0; y < height; y++) 4135 { 4136 for(int x = 0; x < width; x += 4) 4137 { 4138 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4139 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4140 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4141 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4142 4143 c0 = _mm_avg_epu16(c0, c1); 4144 c2 = _mm_avg_epu16(c2, c3); 4145 c0 = _mm_avg_epu16(c0, c2); 4146 4147 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4148 } 4149 4150 source0 += pitch; 4151 source1 += pitch; 4152 source2 += pitch; 4153 source3 += pitch; 4154 } 4155 } 4156 else if(internal.samples == 8) 4157 { 4158 for(int y = 0; y < height; y++) 4159 { 4160 for(int x = 0; x < width; x += 4) 4161 { 4162 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4163 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4164 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4165 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4166 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4167 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4168 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4169 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4170 4171 c0 = _mm_avg_epu16(c0, c1); 4172 c2 = _mm_avg_epu16(c2, c3); 4173 c4 = _mm_avg_epu16(c4, c5); 4174 c6 = _mm_avg_epu16(c6, c7); 4175 c0 = _mm_avg_epu16(c0, c2); 4176 c4 = _mm_avg_epu16(c4, c6); 4177 c0 = _mm_avg_epu16(c0, c4); 4178 4179 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4180 } 4181 4182 source0 += pitch; 4183 source1 += pitch; 4184 source2 += pitch; 4185 source3 += pitch; 4186 source4 += pitch; 4187 source5 += pitch; 4188 source6 += pitch; 4189 source7 += pitch; 4190 } 4191 } 4192 else if(internal.samples == 16) 4193 { 4194 for(int y = 0; y < height; y++) 4195 { 4196 for(int x = 0; x < width; x += 4) 4197 { 4198 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4199 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4200 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4201 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4202 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4203 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4204 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4205 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4206 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4207 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4208 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4209 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4210 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4211 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4212 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4213 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4214 4215 c0 = _mm_avg_epu16(c0, c1); 4216 c2 = _mm_avg_epu16(c2, c3); 4217 c4 = _mm_avg_epu16(c4, c5); 4218 c6 = _mm_avg_epu16(c6, c7); 4219 c8 = _mm_avg_epu16(c8, c9); 4220 cA = _mm_avg_epu16(cA, cB); 4221 cC = _mm_avg_epu16(cC, cD); 4222 cE = _mm_avg_epu16(cE, cF); 4223 c0 = _mm_avg_epu16(c0, c2); 4224 c4 = _mm_avg_epu16(c4, c6); 4225 c8 = _mm_avg_epu16(c8, cA); 4226 cC = _mm_avg_epu16(cC, cE); 4227 c0 = _mm_avg_epu16(c0, c4); 4228 c8 = _mm_avg_epu16(c8, cC); 4229 c0 = _mm_avg_epu16(c0, c8); 4230 4231 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4232 } 4233 4234 source0 += pitch; 4235 source1 += pitch; 4236 source2 += pitch; 4237 source3 += pitch; 4238 source4 += pitch; 4239 source5 += pitch; 4240 source6 += pitch; 4241 source7 += pitch; 4242 source8 += pitch; 4243 source9 += pitch; 4244 sourceA += pitch; 4245 sourceB += pitch; 4246 sourceC += pitch; 4247 sourceD += pitch; 4248 sourceE += pitch; 4249 sourceF += pitch; 4250 } 4251 } 4252 else ASSERT(false); 4253 } 4254 else 4255 #endif 4256 { 4257 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4258 4259 if(internal.samples == 2) 4260 { 4261 for(int y = 0; y < height; y++) 4262 { 4263 for(int x = 0; x < width; x++) 4264 { 4265 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4266 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4267 4268 c0 = AVERAGE(c0, c1); 4269 4270 *(unsigned int*)(source0 + 4 * x) = c0; 4271 } 4272 4273 source0 += pitch; 4274 source1 += pitch; 4275 } 4276 } 4277 else if(internal.samples == 4) 4278 { 4279 for(int y = 0; y < height; y++) 4280 { 4281 for(int x = 0; x < width; x++) 4282 { 4283 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4284 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4285 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4286 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4287 4288 c0 = AVERAGE(c0, c1); 4289 c2 = AVERAGE(c2, c3); 4290 c0 = AVERAGE(c0, c2); 4291 4292 *(unsigned int*)(source0 + 4 * x) = c0; 4293 } 4294 4295 source0 += pitch; 4296 source1 += pitch; 4297 source2 += pitch; 4298 source3 += pitch; 4299 } 4300 } 4301 else if(internal.samples == 8) 4302 { 4303 for(int y = 0; y < height; y++) 4304 { 4305 for(int x = 0; x < width; x++) 4306 { 4307 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4308 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4309 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4310 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4311 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4312 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4313 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4314 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4315 4316 c0 = AVERAGE(c0, c1); 4317 c2 = AVERAGE(c2, c3); 4318 c4 = AVERAGE(c4, c5); 4319 c6 = AVERAGE(c6, c7); 4320 c0 = AVERAGE(c0, c2); 4321 c4 = AVERAGE(c4, c6); 4322 c0 = AVERAGE(c0, c4); 4323 4324 *(unsigned int*)(source0 + 4 * x) = c0; 4325 } 4326 4327 source0 += pitch; 4328 source1 += pitch; 4329 source2 += pitch; 4330 source3 += pitch; 4331 source4 += pitch; 4332 source5 += pitch; 4333 source6 += pitch; 4334 source7 += pitch; 4335 } 4336 } 4337 else if(internal.samples == 16) 4338 { 4339 for(int y = 0; y < height; y++) 4340 { 4341 for(int x = 0; x < width; x++) 4342 { 4343 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4344 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4345 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4346 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4347 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4348 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4349 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4350 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4351 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4352 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4353 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4354 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4355 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4356 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4357 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4358 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4359 4360 c0 = AVERAGE(c0, c1); 4361 c2 = AVERAGE(c2, c3); 4362 c4 = AVERAGE(c4, c5); 4363 c6 = AVERAGE(c6, c7); 4364 c8 = AVERAGE(c8, c9); 4365 cA = AVERAGE(cA, cB); 4366 cC = AVERAGE(cC, cD); 4367 cE = AVERAGE(cE, cF); 4368 c0 = AVERAGE(c0, c2); 4369 c4 = AVERAGE(c4, c6); 4370 c8 = AVERAGE(c8, cA); 4371 cC = AVERAGE(cC, cE); 4372 c0 = AVERAGE(c0, c4); 4373 c8 = AVERAGE(c8, cC); 4374 c0 = AVERAGE(c0, c8); 4375 4376 *(unsigned int*)(source0 + 4 * x) = c0; 4377 } 4378 4379 source0 += pitch; 4380 source1 += pitch; 4381 source2 += pitch; 4382 source3 += pitch; 4383 source4 += pitch; 4384 source5 += pitch; 4385 source6 += pitch; 4386 source7 += pitch; 4387 source8 += pitch; 4388 source9 += pitch; 4389 sourceA += pitch; 4390 sourceB += pitch; 4391 sourceC += pitch; 4392 sourceD += pitch; 4393 sourceE += pitch; 4394 sourceF += pitch; 4395 } 4396 } 4397 else ASSERT(false); 4398 4399 #undef AVERAGE 4400 } 4401 } 4402 else if(internal.format == FORMAT_A16B16G16R16) 4403 { 4404 #if defined(__i386__) || defined(__x86_64__) 4405 if(CPUID::supportsSSE2() && (width % 2) == 0) 4406 { 4407 if(internal.samples == 2) 4408 { 4409 for(int y = 0; y < height; y++) 4410 { 4411 for(int x = 0; x < width; x += 2) 4412 { 4413 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4414 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4415 4416 c0 = _mm_avg_epu16(c0, c1); 4417 4418 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4419 } 4420 4421 source0 += pitch; 4422 source1 += pitch; 4423 } 4424 } 4425 else if(internal.samples == 4) 4426 { 4427 for(int y = 0; y < height; y++) 4428 { 4429 for(int x = 0; x < width; x += 2) 4430 { 4431 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4432 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4433 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4434 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4435 4436 c0 = _mm_avg_epu16(c0, c1); 4437 c2 = _mm_avg_epu16(c2, c3); 4438 c0 = _mm_avg_epu16(c0, c2); 4439 4440 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4441 } 4442 4443 source0 += pitch; 4444 source1 += pitch; 4445 source2 += pitch; 4446 source3 += pitch; 4447 } 4448 } 4449 else if(internal.samples == 8) 4450 { 4451 for(int y = 0; y < height; y++) 4452 { 4453 for(int x = 0; x < width; x += 2) 4454 { 4455 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4456 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4457 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4458 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4459 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4460 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4461 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4462 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4463 4464 c0 = _mm_avg_epu16(c0, c1); 4465 c2 = _mm_avg_epu16(c2, c3); 4466 c4 = _mm_avg_epu16(c4, c5); 4467 c6 = _mm_avg_epu16(c6, c7); 4468 c0 = _mm_avg_epu16(c0, c2); 4469 c4 = _mm_avg_epu16(c4, c6); 4470 c0 = _mm_avg_epu16(c0, c4); 4471 4472 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4473 } 4474 4475 source0 += pitch; 4476 source1 += pitch; 4477 source2 += pitch; 4478 source3 += pitch; 4479 source4 += pitch; 4480 source5 += pitch; 4481 source6 += pitch; 4482 source7 += pitch; 4483 } 4484 } 4485 else if(internal.samples == 16) 4486 { 4487 for(int y = 0; y < height; y++) 4488 { 4489 for(int x = 0; x < width; x += 2) 4490 { 4491 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4492 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4493 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4494 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4495 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4496 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4497 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4498 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4499 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x)); 4500 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x)); 4501 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x)); 4502 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x)); 4503 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x)); 4504 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x)); 4505 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x)); 4506 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x)); 4507 4508 c0 = _mm_avg_epu16(c0, c1); 4509 c2 = _mm_avg_epu16(c2, c3); 4510 c4 = _mm_avg_epu16(c4, c5); 4511 c6 = _mm_avg_epu16(c6, c7); 4512 c8 = _mm_avg_epu16(c8, c9); 4513 cA = _mm_avg_epu16(cA, cB); 4514 cC = _mm_avg_epu16(cC, cD); 4515 cE = _mm_avg_epu16(cE, cF); 4516 c0 = _mm_avg_epu16(c0, c2); 4517 c4 = _mm_avg_epu16(c4, c6); 4518 c8 = _mm_avg_epu16(c8, cA); 4519 cC = _mm_avg_epu16(cC, cE); 4520 c0 = _mm_avg_epu16(c0, c4); 4521 c8 = _mm_avg_epu16(c8, cC); 4522 c0 = _mm_avg_epu16(c0, c8); 4523 4524 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4525 } 4526 4527 source0 += pitch; 4528 source1 += pitch; 4529 source2 += pitch; 4530 source3 += pitch; 4531 source4 += pitch; 4532 source5 += pitch; 4533 source6 += pitch; 4534 source7 += pitch; 4535 source8 += pitch; 4536 source9 += pitch; 4537 sourceA += pitch; 4538 sourceB += pitch; 4539 sourceC += pitch; 4540 sourceD += pitch; 4541 sourceE += pitch; 4542 sourceF += pitch; 4543 } 4544 } 4545 else ASSERT(false); 4546 } 4547 else 4548 #endif 4549 { 4550 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4551 4552 if(internal.samples == 2) 4553 { 4554 for(int y = 0; y < height; y++) 4555 { 4556 for(int x = 0; x < 2 * width; x++) 4557 { 4558 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4559 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4560 4561 c0 = AVERAGE(c0, c1); 4562 4563 *(unsigned int*)(source0 + 4 * x) = c0; 4564 } 4565 4566 source0 += pitch; 4567 source1 += pitch; 4568 } 4569 } 4570 else if(internal.samples == 4) 4571 { 4572 for(int y = 0; y < height; y++) 4573 { 4574 for(int x = 0; x < 2 * width; x++) 4575 { 4576 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4577 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4578 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4579 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4580 4581 c0 = AVERAGE(c0, c1); 4582 c2 = AVERAGE(c2, c3); 4583 c0 = AVERAGE(c0, c2); 4584 4585 *(unsigned int*)(source0 + 4 * x) = c0; 4586 } 4587 4588 source0 += pitch; 4589 source1 += pitch; 4590 source2 += pitch; 4591 source3 += pitch; 4592 } 4593 } 4594 else if(internal.samples == 8) 4595 { 4596 for(int y = 0; y < height; y++) 4597 { 4598 for(int x = 0; x < 2 * width; x++) 4599 { 4600 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4601 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4602 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4603 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4604 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4605 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4606 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4607 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4608 4609 c0 = AVERAGE(c0, c1); 4610 c2 = AVERAGE(c2, c3); 4611 c4 = AVERAGE(c4, c5); 4612 c6 = AVERAGE(c6, c7); 4613 c0 = AVERAGE(c0, c2); 4614 c4 = AVERAGE(c4, c6); 4615 c0 = AVERAGE(c0, c4); 4616 4617 *(unsigned int*)(source0 + 4 * x) = c0; 4618 } 4619 4620 source0 += pitch; 4621 source1 += pitch; 4622 source2 += pitch; 4623 source3 += pitch; 4624 source4 += pitch; 4625 source5 += pitch; 4626 source6 += pitch; 4627 source7 += pitch; 4628 } 4629 } 4630 else if(internal.samples == 16) 4631 { 4632 for(int y = 0; y < height; y++) 4633 { 4634 for(int x = 0; x < 2 * width; x++) 4635 { 4636 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4637 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4638 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4639 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4640 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4641 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4642 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4643 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4644 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4645 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4646 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4647 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4648 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4649 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4650 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4651 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4652 4653 c0 = AVERAGE(c0, c1); 4654 c2 = AVERAGE(c2, c3); 4655 c4 = AVERAGE(c4, c5); 4656 c6 = AVERAGE(c6, c7); 4657 c8 = AVERAGE(c8, c9); 4658 cA = AVERAGE(cA, cB); 4659 cC = AVERAGE(cC, cD); 4660 cE = AVERAGE(cE, cF); 4661 c0 = AVERAGE(c0, c2); 4662 c4 = AVERAGE(c4, c6); 4663 c8 = AVERAGE(c8, cA); 4664 cC = AVERAGE(cC, cE); 4665 c0 = AVERAGE(c0, c4); 4666 c8 = AVERAGE(c8, cC); 4667 c0 = AVERAGE(c0, c8); 4668 4669 *(unsigned int*)(source0 + 4 * x) = c0; 4670 } 4671 4672 source0 += pitch; 4673 source1 += pitch; 4674 source2 += pitch; 4675 source3 += pitch; 4676 source4 += pitch; 4677 source5 += pitch; 4678 source6 += pitch; 4679 source7 += pitch; 4680 source8 += pitch; 4681 source9 += pitch; 4682 sourceA += pitch; 4683 sourceB += pitch; 4684 sourceC += pitch; 4685 sourceD += pitch; 4686 sourceE += pitch; 4687 sourceF += pitch; 4688 } 4689 } 4690 else ASSERT(false); 4691 4692 #undef AVERAGE 4693 } 4694 } 4695 else if(internal.format == FORMAT_R32F) 4696 { 4697 #if defined(__i386__) || defined(__x86_64__) 4698 if(CPUID::supportsSSE() && (width % 4) == 0) 4699 { 4700 if(internal.samples == 2) 4701 { 4702 for(int y = 0; y < height; y++) 4703 { 4704 for(int x = 0; x < width; x += 4) 4705 { 4706 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4707 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4708 4709 c0 = _mm_add_ps(c0, c1); 4710 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4711 4712 _mm_store_ps((float*)(source0 + 4 * x), c0); 4713 } 4714 4715 source0 += pitch; 4716 source1 += pitch; 4717 } 4718 } 4719 else if(internal.samples == 4) 4720 { 4721 for(int y = 0; y < height; y++) 4722 { 4723 for(int x = 0; x < width; x += 4) 4724 { 4725 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4726 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4727 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4728 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4729 4730 c0 = _mm_add_ps(c0, c1); 4731 c2 = _mm_add_ps(c2, c3); 4732 c0 = _mm_add_ps(c0, c2); 4733 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4734 4735 _mm_store_ps((float*)(source0 + 4 * x), c0); 4736 } 4737 4738 source0 += pitch; 4739 source1 += pitch; 4740 source2 += pitch; 4741 source3 += pitch; 4742 } 4743 } 4744 else if(internal.samples == 8) 4745 { 4746 for(int y = 0; y < height; y++) 4747 { 4748 for(int x = 0; x < width; x += 4) 4749 { 4750 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4751 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4752 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4753 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4754 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4755 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4756 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4757 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4758 4759 c0 = _mm_add_ps(c0, c1); 4760 c2 = _mm_add_ps(c2, c3); 4761 c4 = _mm_add_ps(c4, c5); 4762 c6 = _mm_add_ps(c6, c7); 4763 c0 = _mm_add_ps(c0, c2); 4764 c4 = _mm_add_ps(c4, c6); 4765 c0 = _mm_add_ps(c0, c4); 4766 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4767 4768 _mm_store_ps((float*)(source0 + 4 * x), c0); 4769 } 4770 4771 source0 += pitch; 4772 source1 += pitch; 4773 source2 += pitch; 4774 source3 += pitch; 4775 source4 += pitch; 4776 source5 += pitch; 4777 source6 += pitch; 4778 source7 += pitch; 4779 } 4780 } 4781 else if(internal.samples == 16) 4782 { 4783 for(int y = 0; y < height; y++) 4784 { 4785 for(int x = 0; x < width; x += 4) 4786 { 4787 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4788 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4789 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4790 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4791 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4792 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4793 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4794 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4795 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x)); 4796 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x)); 4797 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x)); 4798 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x)); 4799 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x)); 4800 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x)); 4801 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x)); 4802 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x)); 4803 4804 c0 = _mm_add_ps(c0, c1); 4805 c2 = _mm_add_ps(c2, c3); 4806 c4 = _mm_add_ps(c4, c5); 4807 c6 = _mm_add_ps(c6, c7); 4808 c8 = _mm_add_ps(c8, c9); 4809 cA = _mm_add_ps(cA, cB); 4810 cC = _mm_add_ps(cC, cD); 4811 cE = _mm_add_ps(cE, cF); 4812 c0 = _mm_add_ps(c0, c2); 4813 c4 = _mm_add_ps(c4, c6); 4814 c8 = _mm_add_ps(c8, cA); 4815 cC = _mm_add_ps(cC, cE); 4816 c0 = _mm_add_ps(c0, c4); 4817 c8 = _mm_add_ps(c8, cC); 4818 c0 = _mm_add_ps(c0, c8); 4819 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 4820 4821 _mm_store_ps((float*)(source0 + 4 * x), c0); 4822 } 4823 4824 source0 += pitch; 4825 source1 += pitch; 4826 source2 += pitch; 4827 source3 += pitch; 4828 source4 += pitch; 4829 source5 += pitch; 4830 source6 += pitch; 4831 source7 += pitch; 4832 source8 += pitch; 4833 source9 += pitch; 4834 sourceA += pitch; 4835 sourceB += pitch; 4836 sourceC += pitch; 4837 sourceD += pitch; 4838 sourceE += pitch; 4839 sourceF += pitch; 4840 } 4841 } 4842 else ASSERT(false); 4843 } 4844 else 4845 #endif 4846 { 4847 if(internal.samples == 2) 4848 { 4849 for(int y = 0; y < height; y++) 4850 { 4851 for(int x = 0; x < width; x++) 4852 { 4853 float c0 = *(float*)(source0 + 4 * x); 4854 float c1 = *(float*)(source1 + 4 * x); 4855 4856 c0 = c0 + c1; 4857 c0 *= 1.0f / 2.0f; 4858 4859 *(float*)(source0 + 4 * x) = c0; 4860 } 4861 4862 source0 += pitch; 4863 source1 += pitch; 4864 } 4865 } 4866 else if(internal.samples == 4) 4867 { 4868 for(int y = 0; y < height; y++) 4869 { 4870 for(int x = 0; x < width; x++) 4871 { 4872 float c0 = *(float*)(source0 + 4 * x); 4873 float c1 = *(float*)(source1 + 4 * x); 4874 float c2 = *(float*)(source2 + 4 * x); 4875 float c3 = *(float*)(source3 + 4 * x); 4876 4877 c0 = c0 + c1; 4878 c2 = c2 + c3; 4879 c0 = c0 + c2; 4880 c0 *= 1.0f / 4.0f; 4881 4882 *(float*)(source0 + 4 * x) = c0; 4883 } 4884 4885 source0 += pitch; 4886 source1 += pitch; 4887 source2 += pitch; 4888 source3 += pitch; 4889 } 4890 } 4891 else if(internal.samples == 8) 4892 { 4893 for(int y = 0; y < height; y++) 4894 { 4895 for(int x = 0; x < width; x++) 4896 { 4897 float c0 = *(float*)(source0 + 4 * x); 4898 float c1 = *(float*)(source1 + 4 * x); 4899 float c2 = *(float*)(source2 + 4 * x); 4900 float c3 = *(float*)(source3 + 4 * x); 4901 float c4 = *(float*)(source4 + 4 * x); 4902 float c5 = *(float*)(source5 + 4 * x); 4903 float c6 = *(float*)(source6 + 4 * x); 4904 float c7 = *(float*)(source7 + 4 * x); 4905 4906 c0 = c0 + c1; 4907 c2 = c2 + c3; 4908 c4 = c4 + c5; 4909 c6 = c6 + c7; 4910 c0 = c0 + c2; 4911 c4 = c4 + c6; 4912 c0 = c0 + c4; 4913 c0 *= 1.0f / 8.0f; 4914 4915 *(float*)(source0 + 4 * x) = c0; 4916 } 4917 4918 source0 += pitch; 4919 source1 += pitch; 4920 source2 += pitch; 4921 source3 += pitch; 4922 source4 += pitch; 4923 source5 += pitch; 4924 source6 += pitch; 4925 source7 += pitch; 4926 } 4927 } 4928 else if(internal.samples == 16) 4929 { 4930 for(int y = 0; y < height; y++) 4931 { 4932 for(int x = 0; x < width; x++) 4933 { 4934 float c0 = *(float*)(source0 + 4 * x); 4935 float c1 = *(float*)(source1 + 4 * x); 4936 float c2 = *(float*)(source2 + 4 * x); 4937 float c3 = *(float*)(source3 + 4 * x); 4938 float c4 = *(float*)(source4 + 4 * x); 4939 float c5 = *(float*)(source5 + 4 * x); 4940 float c6 = *(float*)(source6 + 4 * x); 4941 float c7 = *(float*)(source7 + 4 * x); 4942 float c8 = *(float*)(source8 + 4 * x); 4943 float c9 = *(float*)(source9 + 4 * x); 4944 float cA = *(float*)(sourceA + 4 * x); 4945 float cB = *(float*)(sourceB + 4 * x); 4946 float cC = *(float*)(sourceC + 4 * x); 4947 float cD = *(float*)(sourceD + 4 * x); 4948 float cE = *(float*)(sourceE + 4 * x); 4949 float cF = *(float*)(sourceF + 4 * x); 4950 4951 c0 = c0 + c1; 4952 c2 = c2 + c3; 4953 c4 = c4 + c5; 4954 c6 = c6 + c7; 4955 c8 = c8 + c9; 4956 cA = cA + cB; 4957 cC = cC + cD; 4958 cE = cE + cF; 4959 c0 = c0 + c2; 4960 c4 = c4 + c6; 4961 c8 = c8 + cA; 4962 cC = cC + cE; 4963 c0 = c0 + c4; 4964 c8 = c8 + cC; 4965 c0 = c0 + c8; 4966 c0 *= 1.0f / 16.0f; 4967 4968 *(float*)(source0 + 4 * x) = c0; 4969 } 4970 4971 source0 += pitch; 4972 source1 += pitch; 4973 source2 += pitch; 4974 source3 += pitch; 4975 source4 += pitch; 4976 source5 += pitch; 4977 source6 += pitch; 4978 source7 += pitch; 4979 source8 += pitch; 4980 source9 += pitch; 4981 sourceA += pitch; 4982 sourceB += pitch; 4983 sourceC += pitch; 4984 sourceD += pitch; 4985 sourceE += pitch; 4986 sourceF += pitch; 4987 } 4988 } 4989 else ASSERT(false); 4990 } 4991 } 4992 else if(internal.format == FORMAT_G32R32F) 4993 { 4994 #if defined(__i386__) || defined(__x86_64__) 4995 if(CPUID::supportsSSE() && (width % 2) == 0) 4996 { 4997 if(internal.samples == 2) 4998 { 4999 for(int y = 0; y < height; y++) 5000 { 5001 for(int x = 0; x < width; x += 2) 5002 { 5003 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5004 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5005 5006 c0 = _mm_add_ps(c0, c1); 5007 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5008 5009 _mm_store_ps((float*)(source0 + 8 * x), c0); 5010 } 5011 5012 source0 += pitch; 5013 source1 += pitch; 5014 } 5015 } 5016 else if(internal.samples == 4) 5017 { 5018 for(int y = 0; y < height; y++) 5019 { 5020 for(int x = 0; x < width; x += 2) 5021 { 5022 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5023 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5024 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5025 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5026 5027 c0 = _mm_add_ps(c0, c1); 5028 c2 = _mm_add_ps(c2, c3); 5029 c0 = _mm_add_ps(c0, c2); 5030 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5031 5032 _mm_store_ps((float*)(source0 + 8 * x), c0); 5033 } 5034 5035 source0 += pitch; 5036 source1 += pitch; 5037 source2 += pitch; 5038 source3 += pitch; 5039 } 5040 } 5041 else if(internal.samples == 8) 5042 { 5043 for(int y = 0; y < height; y++) 5044 { 5045 for(int x = 0; x < width; x += 2) 5046 { 5047 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5048 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5049 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5050 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5051 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5052 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5053 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5054 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5055 5056 c0 = _mm_add_ps(c0, c1); 5057 c2 = _mm_add_ps(c2, c3); 5058 c4 = _mm_add_ps(c4, c5); 5059 c6 = _mm_add_ps(c6, c7); 5060 c0 = _mm_add_ps(c0, c2); 5061 c4 = _mm_add_ps(c4, c6); 5062 c0 = _mm_add_ps(c0, c4); 5063 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5064 5065 _mm_store_ps((float*)(source0 + 8 * x), c0); 5066 } 5067 5068 source0 += pitch; 5069 source1 += pitch; 5070 source2 += pitch; 5071 source3 += pitch; 5072 source4 += pitch; 5073 source5 += pitch; 5074 source6 += pitch; 5075 source7 += pitch; 5076 } 5077 } 5078 else if(internal.samples == 16) 5079 { 5080 for(int y = 0; y < height; y++) 5081 { 5082 for(int x = 0; x < width; x += 2) 5083 { 5084 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5085 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5086 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5087 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5088 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5089 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5090 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5091 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5092 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x)); 5093 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x)); 5094 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x)); 5095 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x)); 5096 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x)); 5097 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x)); 5098 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x)); 5099 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x)); 5100 5101 c0 = _mm_add_ps(c0, c1); 5102 c2 = _mm_add_ps(c2, c3); 5103 c4 = _mm_add_ps(c4, c5); 5104 c6 = _mm_add_ps(c6, c7); 5105 c8 = _mm_add_ps(c8, c9); 5106 cA = _mm_add_ps(cA, cB); 5107 cC = _mm_add_ps(cC, cD); 5108 cE = _mm_add_ps(cE, cF); 5109 c0 = _mm_add_ps(c0, c2); 5110 c4 = _mm_add_ps(c4, c6); 5111 c8 = _mm_add_ps(c8, cA); 5112 cC = _mm_add_ps(cC, cE); 5113 c0 = _mm_add_ps(c0, c4); 5114 c8 = _mm_add_ps(c8, cC); 5115 c0 = _mm_add_ps(c0, c8); 5116 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5117 5118 _mm_store_ps((float*)(source0 + 8 * x), c0); 5119 } 5120 5121 source0 += pitch; 5122 source1 += pitch; 5123 source2 += pitch; 5124 source3 += pitch; 5125 source4 += pitch; 5126 source5 += pitch; 5127 source6 += pitch; 5128 source7 += pitch; 5129 source8 += pitch; 5130 source9 += pitch; 5131 sourceA += pitch; 5132 sourceB += pitch; 5133 sourceC += pitch; 5134 sourceD += pitch; 5135 sourceE += pitch; 5136 sourceF += pitch; 5137 } 5138 } 5139 else ASSERT(false); 5140 } 5141 else 5142 #endif 5143 { 5144 if(internal.samples == 2) 5145 { 5146 for(int y = 0; y < height; y++) 5147 { 5148 for(int x = 0; x < 2 * width; x++) 5149 { 5150 float c0 = *(float*)(source0 + 4 * x); 5151 float c1 = *(float*)(source1 + 4 * x); 5152 5153 c0 = c0 + c1; 5154 c0 *= 1.0f / 2.0f; 5155 5156 *(float*)(source0 + 4 * x) = c0; 5157 } 5158 5159 source0 += pitch; 5160 source1 += pitch; 5161 } 5162 } 5163 else if(internal.samples == 4) 5164 { 5165 for(int y = 0; y < height; y++) 5166 { 5167 for(int x = 0; x < 2 * width; x++) 5168 { 5169 float c0 = *(float*)(source0 + 4 * x); 5170 float c1 = *(float*)(source1 + 4 * x); 5171 float c2 = *(float*)(source2 + 4 * x); 5172 float c3 = *(float*)(source3 + 4 * x); 5173 5174 c0 = c0 + c1; 5175 c2 = c2 + c3; 5176 c0 = c0 + c2; 5177 c0 *= 1.0f / 4.0f; 5178 5179 *(float*)(source0 + 4 * x) = c0; 5180 } 5181 5182 source0 += pitch; 5183 source1 += pitch; 5184 source2 += pitch; 5185 source3 += pitch; 5186 } 5187 } 5188 else if(internal.samples == 8) 5189 { 5190 for(int y = 0; y < height; y++) 5191 { 5192 for(int x = 0; x < 2 * width; x++) 5193 { 5194 float c0 = *(float*)(source0 + 4 * x); 5195 float c1 = *(float*)(source1 + 4 * x); 5196 float c2 = *(float*)(source2 + 4 * x); 5197 float c3 = *(float*)(source3 + 4 * x); 5198 float c4 = *(float*)(source4 + 4 * x); 5199 float c5 = *(float*)(source5 + 4 * x); 5200 float c6 = *(float*)(source6 + 4 * x); 5201 float c7 = *(float*)(source7 + 4 * x); 5202 5203 c0 = c0 + c1; 5204 c2 = c2 + c3; 5205 c4 = c4 + c5; 5206 c6 = c6 + c7; 5207 c0 = c0 + c2; 5208 c4 = c4 + c6; 5209 c0 = c0 + c4; 5210 c0 *= 1.0f / 8.0f; 5211 5212 *(float*)(source0 + 4 * x) = c0; 5213 } 5214 5215 source0 += pitch; 5216 source1 += pitch; 5217 source2 += pitch; 5218 source3 += pitch; 5219 source4 += pitch; 5220 source5 += pitch; 5221 source6 += pitch; 5222 source7 += pitch; 5223 } 5224 } 5225 else if(internal.samples == 16) 5226 { 5227 for(int y = 0; y < height; y++) 5228 { 5229 for(int x = 0; x < 2 * width; x++) 5230 { 5231 float c0 = *(float*)(source0 + 4 * x); 5232 float c1 = *(float*)(source1 + 4 * x); 5233 float c2 = *(float*)(source2 + 4 * x); 5234 float c3 = *(float*)(source3 + 4 * x); 5235 float c4 = *(float*)(source4 + 4 * x); 5236 float c5 = *(float*)(source5 + 4 * x); 5237 float c6 = *(float*)(source6 + 4 * x); 5238 float c7 = *(float*)(source7 + 4 * x); 5239 float c8 = *(float*)(source8 + 4 * x); 5240 float c9 = *(float*)(source9 + 4 * x); 5241 float cA = *(float*)(sourceA + 4 * x); 5242 float cB = *(float*)(sourceB + 4 * x); 5243 float cC = *(float*)(sourceC + 4 * x); 5244 float cD = *(float*)(sourceD + 4 * x); 5245 float cE = *(float*)(sourceE + 4 * x); 5246 float cF = *(float*)(sourceF + 4 * x); 5247 5248 c0 = c0 + c1; 5249 c2 = c2 + c3; 5250 c4 = c4 + c5; 5251 c6 = c6 + c7; 5252 c8 = c8 + c9; 5253 cA = cA + cB; 5254 cC = cC + cD; 5255 cE = cE + cF; 5256 c0 = c0 + c2; 5257 c4 = c4 + c6; 5258 c8 = c8 + cA; 5259 cC = cC + cE; 5260 c0 = c0 + c4; 5261 c8 = c8 + cC; 5262 c0 = c0 + c8; 5263 c0 *= 1.0f / 16.0f; 5264 5265 *(float*)(source0 + 4 * x) = c0; 5266 } 5267 5268 source0 += pitch; 5269 source1 += pitch; 5270 source2 += pitch; 5271 source3 += pitch; 5272 source4 += pitch; 5273 source5 += pitch; 5274 source6 += pitch; 5275 source7 += pitch; 5276 source8 += pitch; 5277 source9 += pitch; 5278 sourceA += pitch; 5279 sourceB += pitch; 5280 sourceC += pitch; 5281 sourceD += pitch; 5282 sourceE += pitch; 5283 sourceF += pitch; 5284 } 5285 } 5286 else ASSERT(false); 5287 } 5288 } 5289 else if(internal.format == FORMAT_A32B32G32R32F || 5290 internal.format == FORMAT_X32B32G32R32F || 5291 internal.format == FORMAT_X32B32G32R32F_UNSIGNED) 5292 { 5293 #if defined(__i386__) || defined(__x86_64__) 5294 if(CPUID::supportsSSE()) 5295 { 5296 if(internal.samples == 2) 5297 { 5298 for(int y = 0; y < height; y++) 5299 { 5300 for(int x = 0; x < width; x++) 5301 { 5302 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5303 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5304 5305 c0 = _mm_add_ps(c0, c1); 5306 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5307 5308 _mm_store_ps((float*)(source0 + 16 * x), c0); 5309 } 5310 5311 source0 += pitch; 5312 source1 += pitch; 5313 } 5314 } 5315 else if(internal.samples == 4) 5316 { 5317 for(int y = 0; y < height; y++) 5318 { 5319 for(int x = 0; x < width; x++) 5320 { 5321 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5322 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5323 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5324 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5325 5326 c0 = _mm_add_ps(c0, c1); 5327 c2 = _mm_add_ps(c2, c3); 5328 c0 = _mm_add_ps(c0, c2); 5329 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5330 5331 _mm_store_ps((float*)(source0 + 16 * x), c0); 5332 } 5333 5334 source0 += pitch; 5335 source1 += pitch; 5336 source2 += pitch; 5337 source3 += pitch; 5338 } 5339 } 5340 else if(internal.samples == 8) 5341 { 5342 for(int y = 0; y < height; y++) 5343 { 5344 for(int x = 0; x < width; x++) 5345 { 5346 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5347 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5348 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5349 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5350 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5351 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5352 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5353 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5354 5355 c0 = _mm_add_ps(c0, c1); 5356 c2 = _mm_add_ps(c2, c3); 5357 c4 = _mm_add_ps(c4, c5); 5358 c6 = _mm_add_ps(c6, c7); 5359 c0 = _mm_add_ps(c0, c2); 5360 c4 = _mm_add_ps(c4, c6); 5361 c0 = _mm_add_ps(c0, c4); 5362 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5363 5364 _mm_store_ps((float*)(source0 + 16 * x), c0); 5365 } 5366 5367 source0 += pitch; 5368 source1 += pitch; 5369 source2 += pitch; 5370 source3 += pitch; 5371 source4 += pitch; 5372 source5 += pitch; 5373 source6 += pitch; 5374 source7 += pitch; 5375 } 5376 } 5377 else if(internal.samples == 16) 5378 { 5379 for(int y = 0; y < height; y++) 5380 { 5381 for(int x = 0; x < width; x++) 5382 { 5383 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5384 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5385 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5386 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5387 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5388 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5389 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5390 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5391 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x)); 5392 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x)); 5393 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x)); 5394 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x)); 5395 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x)); 5396 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x)); 5397 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x)); 5398 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x)); 5399 5400 c0 = _mm_add_ps(c0, c1); 5401 c2 = _mm_add_ps(c2, c3); 5402 c4 = _mm_add_ps(c4, c5); 5403 c6 = _mm_add_ps(c6, c7); 5404 c8 = _mm_add_ps(c8, c9); 5405 cA = _mm_add_ps(cA, cB); 5406 cC = _mm_add_ps(cC, cD); 5407 cE = _mm_add_ps(cE, cF); 5408 c0 = _mm_add_ps(c0, c2); 5409 c4 = _mm_add_ps(c4, c6); 5410 c8 = _mm_add_ps(c8, cA); 5411 cC = _mm_add_ps(cC, cE); 5412 c0 = _mm_add_ps(c0, c4); 5413 c8 = _mm_add_ps(c8, cC); 5414 c0 = _mm_add_ps(c0, c8); 5415 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5416 5417 _mm_store_ps((float*)(source0 + 16 * x), c0); 5418 } 5419 5420 source0 += pitch; 5421 source1 += pitch; 5422 source2 += pitch; 5423 source3 += pitch; 5424 source4 += pitch; 5425 source5 += pitch; 5426 source6 += pitch; 5427 source7 += pitch; 5428 source8 += pitch; 5429 source9 += pitch; 5430 sourceA += pitch; 5431 sourceB += pitch; 5432 sourceC += pitch; 5433 sourceD += pitch; 5434 sourceE += pitch; 5435 sourceF += pitch; 5436 } 5437 } 5438 else ASSERT(false); 5439 } 5440 else 5441 #endif 5442 { 5443 if(internal.samples == 2) 5444 { 5445 for(int y = 0; y < height; y++) 5446 { 5447 for(int x = 0; x < 4 * width; x++) 5448 { 5449 float c0 = *(float*)(source0 + 4 * x); 5450 float c1 = *(float*)(source1 + 4 * x); 5451 5452 c0 = c0 + c1; 5453 c0 *= 1.0f / 2.0f; 5454 5455 *(float*)(source0 + 4 * x) = c0; 5456 } 5457 5458 source0 += pitch; 5459 source1 += pitch; 5460 } 5461 } 5462 else if(internal.samples == 4) 5463 { 5464 for(int y = 0; y < height; y++) 5465 { 5466 for(int x = 0; x < 4 * width; x++) 5467 { 5468 float c0 = *(float*)(source0 + 4 * x); 5469 float c1 = *(float*)(source1 + 4 * x); 5470 float c2 = *(float*)(source2 + 4 * x); 5471 float c3 = *(float*)(source3 + 4 * x); 5472 5473 c0 = c0 + c1; 5474 c2 = c2 + c3; 5475 c0 = c0 + c2; 5476 c0 *= 1.0f / 4.0f; 5477 5478 *(float*)(source0 + 4 * x) = c0; 5479 } 5480 5481 source0 += pitch; 5482 source1 += pitch; 5483 source2 += pitch; 5484 source3 += pitch; 5485 } 5486 } 5487 else if(internal.samples == 8) 5488 { 5489 for(int y = 0; y < height; y++) 5490 { 5491 for(int x = 0; x < 4 * width; x++) 5492 { 5493 float c0 = *(float*)(source0 + 4 * x); 5494 float c1 = *(float*)(source1 + 4 * x); 5495 float c2 = *(float*)(source2 + 4 * x); 5496 float c3 = *(float*)(source3 + 4 * x); 5497 float c4 = *(float*)(source4 + 4 * x); 5498 float c5 = *(float*)(source5 + 4 * x); 5499 float c6 = *(float*)(source6 + 4 * x); 5500 float c7 = *(float*)(source7 + 4 * x); 5501 5502 c0 = c0 + c1; 5503 c2 = c2 + c3; 5504 c4 = c4 + c5; 5505 c6 = c6 + c7; 5506 c0 = c0 + c2; 5507 c4 = c4 + c6; 5508 c0 = c0 + c4; 5509 c0 *= 1.0f / 8.0f; 5510 5511 *(float*)(source0 + 4 * x) = c0; 5512 } 5513 5514 source0 += pitch; 5515 source1 += pitch; 5516 source2 += pitch; 5517 source3 += pitch; 5518 source4 += pitch; 5519 source5 += pitch; 5520 source6 += pitch; 5521 source7 += pitch; 5522 } 5523 } 5524 else if(internal.samples == 16) 5525 { 5526 for(int y = 0; y < height; y++) 5527 { 5528 for(int x = 0; x < 4 * width; x++) 5529 { 5530 float c0 = *(float*)(source0 + 4 * x); 5531 float c1 = *(float*)(source1 + 4 * x); 5532 float c2 = *(float*)(source2 + 4 * x); 5533 float c3 = *(float*)(source3 + 4 * x); 5534 float c4 = *(float*)(source4 + 4 * x); 5535 float c5 = *(float*)(source5 + 4 * x); 5536 float c6 = *(float*)(source6 + 4 * x); 5537 float c7 = *(float*)(source7 + 4 * x); 5538 float c8 = *(float*)(source8 + 4 * x); 5539 float c9 = *(float*)(source9 + 4 * x); 5540 float cA = *(float*)(sourceA + 4 * x); 5541 float cB = *(float*)(sourceB + 4 * x); 5542 float cC = *(float*)(sourceC + 4 * x); 5543 float cD = *(float*)(sourceD + 4 * x); 5544 float cE = *(float*)(sourceE + 4 * x); 5545 float cF = *(float*)(sourceF + 4 * x); 5546 5547 c0 = c0 + c1; 5548 c2 = c2 + c3; 5549 c4 = c4 + c5; 5550 c6 = c6 + c7; 5551 c8 = c8 + c9; 5552 cA = cA + cB; 5553 cC = cC + cD; 5554 cE = cE + cF; 5555 c0 = c0 + c2; 5556 c4 = c4 + c6; 5557 c8 = c8 + cA; 5558 cC = cC + cE; 5559 c0 = c0 + c4; 5560 c8 = c8 + cC; 5561 c0 = c0 + c8; 5562 c0 *= 1.0f / 16.0f; 5563 5564 *(float*)(source0 + 4 * x) = c0; 5565 } 5566 5567 source0 += pitch; 5568 source1 += pitch; 5569 source2 += pitch; 5570 source3 += pitch; 5571 source4 += pitch; 5572 source5 += pitch; 5573 source6 += pitch; 5574 source7 += pitch; 5575 source8 += pitch; 5576 source9 += pitch; 5577 sourceA += pitch; 5578 sourceB += pitch; 5579 sourceC += pitch; 5580 sourceD += pitch; 5581 sourceE += pitch; 5582 sourceF += pitch; 5583 } 5584 } 5585 else ASSERT(false); 5586 } 5587 } 5588 else if(internal.format == FORMAT_R5G6B5) 5589 { 5590 #if defined(__i386__) || defined(__x86_64__) 5591 if(CPUID::supportsSSE2() && (width % 8) == 0) 5592 { 5593 if(internal.samples == 2) 5594 { 5595 for(int y = 0; y < height; y++) 5596 { 5597 for(int x = 0; x < width; x += 8) 5598 { 5599 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5600 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5601 5602 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5603 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5604 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5605 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5606 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5607 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5608 5609 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5610 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5611 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5612 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5613 c0 = _mm_or_si128(c0, c1); 5614 5615 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5616 } 5617 5618 source0 += pitch; 5619 source1 += pitch; 5620 } 5621 } 5622 else if(internal.samples == 4) 5623 { 5624 for(int y = 0; y < height; y++) 5625 { 5626 for(int x = 0; x < width; x += 8) 5627 { 5628 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5629 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5630 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5631 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5632 5633 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5634 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5635 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5636 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5637 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5638 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5639 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5640 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5641 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5642 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5643 5644 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5645 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5646 c0 = _mm_avg_epu8(c0, c2); 5647 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5648 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5649 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5650 c1 = _mm_avg_epu16(c1, c3); 5651 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5652 c0 = _mm_or_si128(c0, c1); 5653 5654 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5655 } 5656 5657 source0 += pitch; 5658 source1 += pitch; 5659 source2 += pitch; 5660 source3 += pitch; 5661 } 5662 } 5663 else if(internal.samples == 8) 5664 { 5665 for(int y = 0; y < height; y++) 5666 { 5667 for(int x = 0; x < width; x += 8) 5668 { 5669 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5670 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5671 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5672 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5673 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5674 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5675 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5676 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5677 5678 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5679 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5680 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5681 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5682 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5683 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5684 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5685 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5686 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5687 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5688 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5689 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5690 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5691 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5692 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5693 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5694 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5695 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5696 5697 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5698 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5699 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5700 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5701 c0 = _mm_avg_epu8(c0, c2); 5702 c4 = _mm_avg_epu8(c4, c6); 5703 c0 = _mm_avg_epu8(c0, c4); 5704 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5705 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5706 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5707 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5708 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5709 c1 = _mm_avg_epu16(c1, c3); 5710 c5 = _mm_avg_epu16(c5, c7); 5711 c1 = _mm_avg_epu16(c1, c5); 5712 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5713 c0 = _mm_or_si128(c0, c1); 5714 5715 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5716 } 5717 5718 source0 += pitch; 5719 source1 += pitch; 5720 source2 += pitch; 5721 source3 += pitch; 5722 source4 += pitch; 5723 source5 += pitch; 5724 source6 += pitch; 5725 source7 += pitch; 5726 } 5727 } 5728 else if(internal.samples == 16) 5729 { 5730 for(int y = 0; y < height; y++) 5731 { 5732 for(int x = 0; x < width; x += 8) 5733 { 5734 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5735 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5736 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5737 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5738 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5739 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5740 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5741 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5742 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x)); 5743 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x)); 5744 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x)); 5745 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x)); 5746 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x)); 5747 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x)); 5748 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x)); 5749 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x)); 5750 5751 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5752 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5753 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5754 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5755 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5756 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5757 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5758 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5759 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5760 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5761 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5762 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5763 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5764 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5765 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5766 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5767 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5768 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5769 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b)); 5770 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_)); 5771 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b)); 5772 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_)); 5773 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b)); 5774 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_)); 5775 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b)); 5776 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_)); 5777 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b)); 5778 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_)); 5779 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b)); 5780 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_)); 5781 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b)); 5782 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_)); 5783 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b)); 5784 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_)); 5785 5786 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5787 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5788 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5789 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5790 c8 = _mm_avg_epu8(c8_r_b, c9_r_b); 5791 cA = _mm_avg_epu8(cA_r_b, cB_r_b); 5792 cC = _mm_avg_epu8(cC_r_b, cD_r_b); 5793 cE = _mm_avg_epu8(cE_r_b, cF_r_b); 5794 c0 = _mm_avg_epu8(c0, c2); 5795 c4 = _mm_avg_epu8(c4, c6); 5796 c8 = _mm_avg_epu8(c8, cA); 5797 cC = _mm_avg_epu8(cC, cE); 5798 c0 = _mm_avg_epu8(c0, c4); 5799 c8 = _mm_avg_epu8(c8, cC); 5800 c0 = _mm_avg_epu8(c0, c8); 5801 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5802 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5803 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5804 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5805 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5806 c9 = _mm_avg_epu16(c8__g_, c9__g_); 5807 cB = _mm_avg_epu16(cA__g_, cB__g_); 5808 cD = _mm_avg_epu16(cC__g_, cD__g_); 5809 cF = _mm_avg_epu16(cE__g_, cF__g_); 5810 c1 = _mm_avg_epu8(c1, c3); 5811 c5 = _mm_avg_epu8(c5, c7); 5812 c9 = _mm_avg_epu8(c9, cB); 5813 cD = _mm_avg_epu8(cD, cF); 5814 c1 = _mm_avg_epu8(c1, c5); 5815 c9 = _mm_avg_epu8(c9, cD); 5816 c1 = _mm_avg_epu8(c1, c9); 5817 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5818 c0 = _mm_or_si128(c0, c1); 5819 5820 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5821 } 5822 5823 source0 += pitch; 5824 source1 += pitch; 5825 source2 += pitch; 5826 source3 += pitch; 5827 source4 += pitch; 5828 source5 += pitch; 5829 source6 += pitch; 5830 source7 += pitch; 5831 source8 += pitch; 5832 source9 += pitch; 5833 sourceA += pitch; 5834 sourceB += pitch; 5835 sourceC += pitch; 5836 sourceD += pitch; 5837 sourceE += pitch; 5838 sourceF += pitch; 5839 } 5840 } 5841 else ASSERT(false); 5842 } 5843 else 5844 #endif 5845 { 5846 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821)) 5847 5848 if(internal.samples == 2) 5849 { 5850 for(int y = 0; y < height; y++) 5851 { 5852 for(int x = 0; x < width; x++) 5853 { 5854 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5855 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5856 5857 c0 = AVERAGE(c0, c1); 5858 5859 *(unsigned short*)(source0 + 2 * x) = c0; 5860 } 5861 5862 source0 += pitch; 5863 source1 += pitch; 5864 } 5865 } 5866 else if(internal.samples == 4) 5867 { 5868 for(int y = 0; y < height; y++) 5869 { 5870 for(int x = 0; x < width; x++) 5871 { 5872 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5873 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5874 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5875 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5876 5877 c0 = AVERAGE(c0, c1); 5878 c2 = AVERAGE(c2, c3); 5879 c0 = AVERAGE(c0, c2); 5880 5881 *(unsigned short*)(source0 + 2 * x) = c0; 5882 } 5883 5884 source0 += pitch; 5885 source1 += pitch; 5886 source2 += pitch; 5887 source3 += pitch; 5888 } 5889 } 5890 else if(internal.samples == 8) 5891 { 5892 for(int y = 0; y < height; y++) 5893 { 5894 for(int x = 0; x < width; x++) 5895 { 5896 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5897 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5898 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5899 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5900 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5901 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5902 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5903 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5904 5905 c0 = AVERAGE(c0, c1); 5906 c2 = AVERAGE(c2, c3); 5907 c4 = AVERAGE(c4, c5); 5908 c6 = AVERAGE(c6, c7); 5909 c0 = AVERAGE(c0, c2); 5910 c4 = AVERAGE(c4, c6); 5911 c0 = AVERAGE(c0, c4); 5912 5913 *(unsigned short*)(source0 + 2 * x) = c0; 5914 } 5915 5916 source0 += pitch; 5917 source1 += pitch; 5918 source2 += pitch; 5919 source3 += pitch; 5920 source4 += pitch; 5921 source5 += pitch; 5922 source6 += pitch; 5923 source7 += pitch; 5924 } 5925 } 5926 else if(internal.samples == 16) 5927 { 5928 for(int y = 0; y < height; y++) 5929 { 5930 for(int x = 0; x < width; x++) 5931 { 5932 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5933 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5934 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5935 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5936 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5937 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5938 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5939 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5940 unsigned short c8 = *(unsigned short*)(source8 + 2 * x); 5941 unsigned short c9 = *(unsigned short*)(source9 + 2 * x); 5942 unsigned short cA = *(unsigned short*)(sourceA + 2 * x); 5943 unsigned short cB = *(unsigned short*)(sourceB + 2 * x); 5944 unsigned short cC = *(unsigned short*)(sourceC + 2 * x); 5945 unsigned short cD = *(unsigned short*)(sourceD + 2 * x); 5946 unsigned short cE = *(unsigned short*)(sourceE + 2 * x); 5947 unsigned short cF = *(unsigned short*)(sourceF + 2 * x); 5948 5949 c0 = AVERAGE(c0, c1); 5950 c2 = AVERAGE(c2, c3); 5951 c4 = AVERAGE(c4, c5); 5952 c6 = AVERAGE(c6, c7); 5953 c8 = AVERAGE(c8, c9); 5954 cA = AVERAGE(cA, cB); 5955 cC = AVERAGE(cC, cD); 5956 cE = AVERAGE(cE, cF); 5957 c0 = AVERAGE(c0, c2); 5958 c4 = AVERAGE(c4, c6); 5959 c8 = AVERAGE(c8, cA); 5960 cC = AVERAGE(cC, cE); 5961 c0 = AVERAGE(c0, c4); 5962 c8 = AVERAGE(c8, cC); 5963 c0 = AVERAGE(c0, c8); 5964 5965 *(unsigned short*)(source0 + 2 * x) = c0; 5966 } 5967 5968 source0 += pitch; 5969 source1 += pitch; 5970 source2 += pitch; 5971 source3 += pitch; 5972 source4 += pitch; 5973 source5 += pitch; 5974 source6 += pitch; 5975 source7 += pitch; 5976 source8 += pitch; 5977 source9 += pitch; 5978 sourceA += pitch; 5979 sourceB += pitch; 5980 sourceC += pitch; 5981 sourceD += pitch; 5982 sourceE += pitch; 5983 sourceF += pitch; 5984 } 5985 } 5986 else ASSERT(false); 5987 5988 #undef AVERAGE 5989 } 5990 } 5991 else 5992 { 5993 // UNIMPLEMENTED(); 5994 } 5995 } 5996 } 5997