1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Surface.hpp" 16 17 #include "Color.hpp" 18 #include "Context.hpp" 19 #include "ETC_Decoder.hpp" 20 #include "Renderer.hpp" 21 #include "Common/Half.hpp" 22 #include "Common/Memory.hpp" 23 #include "Common/CPUID.hpp" 24 #include "Common/Resource.hpp" 25 #include "Common/Debug.hpp" 26 #include "Reactor/Reactor.hpp" 27 28 #if defined(__i386__) || defined(__x86_64__) 29 #include <xmmintrin.h> 30 #include <emmintrin.h> 31 #endif 32 33 #undef min 34 #undef max 35 36 namespace sw 37 { 38 extern bool quadLayoutEnabled; 39 extern bool complementaryDepthBuffer; 40 extern TranscendentalPrecision logPrecision; 41 42 unsigned int *Surface::palette = 0; 43 unsigned int Surface::paletteID = 0; 44 write(int x,int y,int z,const Color<float> & color)45 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color) 46 { 47 ASSERT((x >= -border) && (x < (width + border))); 48 ASSERT((y >= -border) && (y < (height + border))); 49 ASSERT((z >= 0) && (z < depth)); 50 51 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB; 52 53 for(int i = 0; i < samples; i++) 54 { 55 write(element, color); 56 element += sliceB; 57 } 58 } 59 write(int x,int y,const Color<float> & color)60 void Surface::Buffer::write(int x, int y, const Color<float> &color) 61 { 62 ASSERT((x >= -border) && (x < (width + border))); 63 ASSERT((y >= -border) && (y < (height + border))); 64 65 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB; 66 67 for(int i = 0; i < samples; i++) 68 { 69 write(element, color); 70 element += sliceB; 71 } 72 } 73 write(void * element,const Color<float> & color)74 inline void Surface::Buffer::write(void *element, const Color<float> &color) 75 { 76 float r = color.r; 77 float g = color.g; 78 float b = color.b; 79 float a = color.a; 80 81 if(isSRGBformat(format)) 82 { 83 r = linearToSRGB(r); 84 g = linearToSRGB(g); 85 b = linearToSRGB(b); 86 } 87 88 switch(format) 89 { 90 case FORMAT_A8: 91 *(unsigned char*)element = unorm<8>(a); 92 break; 93 case FORMAT_R8_SNORM: 94 *(char*)element = snorm<8>(r); 95 break; 96 case FORMAT_R8: 97 *(unsigned char*)element = unorm<8>(r); 98 break; 99 case FORMAT_R8I: 100 *(char*)element = scast<8>(r); 101 break; 102 case FORMAT_R8UI: 103 *(unsigned char*)element = ucast<8>(r); 104 break; 105 case FORMAT_R16I: 106 *(short*)element = scast<16>(r); 107 break; 108 case FORMAT_R16UI: 109 *(unsigned short*)element = ucast<16>(r); 110 break; 111 case FORMAT_R32I: 112 *(int*)element = static_cast<int>(r); 113 break; 114 case FORMAT_R32UI: 115 *(unsigned int*)element = static_cast<unsigned int>(r); 116 break; 117 case FORMAT_R3G3B2: 118 *(unsigned char*)element = (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0); 119 break; 120 case FORMAT_A8R3G3B2: 121 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0); 122 break; 123 case FORMAT_X4R4G4B4: 124 *(unsigned short*)element = 0xF000 | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0); 125 break; 126 case FORMAT_A4R4G4B4: 127 *(unsigned short*)element = (unorm<4>(a) << 12) | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0); 128 break; 129 case FORMAT_R4G4B4A4: 130 *(unsigned short*)element = (unorm<4>(r) << 12) | (unorm<4>(g) << 8) | (unorm<4>(b) << 4) | (unorm<4>(a) << 0); 131 break; 132 case FORMAT_R5G6B5: 133 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<6>(g) << 5) | (unorm<5>(b) << 0); 134 break; 135 case FORMAT_A1R5G5B5: 136 *(unsigned short*)element = (unorm<1>(a) << 15) | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0); 137 break; 138 case FORMAT_R5G5B5A1: 139 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<5>(g) << 6) | (unorm<5>(b) << 1) | (unorm<5>(a) << 0); 140 break; 141 case FORMAT_X1R5G5B5: 142 *(unsigned short*)element = 0x8000 | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0); 143 break; 144 case FORMAT_A8R8G8B8: 145 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0); 146 break; 147 case FORMAT_X8R8G8B8: 148 *(unsigned int*)element = 0xFF000000 | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0); 149 break; 150 case FORMAT_A8B8G8R8_SNORM: 151 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(a)) << 24) | 152 (static_cast<unsigned int>(snorm<8>(b)) << 16) | 153 (static_cast<unsigned int>(snorm<8>(g)) << 8) | 154 (static_cast<unsigned int>(snorm<8>(r)) << 0); 155 break; 156 case FORMAT_A8B8G8R8: 157 case FORMAT_SRGB8_A8: 158 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 159 break; 160 case FORMAT_A8B8G8R8I: 161 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(a)) << 24) | 162 (static_cast<unsigned int>(scast<8>(b)) << 16) | 163 (static_cast<unsigned int>(scast<8>(g)) << 8) | 164 (static_cast<unsigned int>(scast<8>(r)) << 0); 165 break; 166 case FORMAT_A8B8G8R8UI: 167 *(unsigned int*)element = (ucast<8>(a) << 24) | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 168 break; 169 case FORMAT_X8B8G8R8_SNORM: 170 *(unsigned int*)element = 0x7F000000 | 171 (static_cast<unsigned int>(snorm<8>(b)) << 16) | 172 (static_cast<unsigned int>(snorm<8>(g)) << 8) | 173 (static_cast<unsigned int>(snorm<8>(r)) << 0); 174 break; 175 case FORMAT_X8B8G8R8: 176 case FORMAT_SRGB8_X8: 177 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 178 break; 179 case FORMAT_X8B8G8R8I: 180 *(unsigned int*)element = 0x7F000000 | 181 (static_cast<unsigned int>(scast<8>(b)) << 16) | 182 (static_cast<unsigned int>(scast<8>(g)) << 8) | 183 (static_cast<unsigned int>(scast<8>(r)) << 0); 184 case FORMAT_X8B8G8R8UI: 185 *(unsigned int*)element = 0xFF000000 | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 186 break; 187 case FORMAT_A2R10G10B10: 188 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(r) << 20) | (unorm<10>(g) << 10) | (unorm<10>(b) << 0); 189 break; 190 case FORMAT_A2B10G10R10: 191 case FORMAT_A2B10G10R10UI: 192 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(b) << 20) | (unorm<10>(g) << 10) | (unorm<10>(r) << 0); 193 break; 194 case FORMAT_G8R8_SNORM: 195 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(g)) << 8) | 196 (static_cast<unsigned short>(snorm<8>(r)) << 0); 197 break; 198 case FORMAT_G8R8: 199 *(unsigned short*)element = (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 200 break; 201 case FORMAT_G8R8I: 202 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(g)) << 8) | 203 (static_cast<unsigned short>(scast<8>(r)) << 0); 204 break; 205 case FORMAT_G8R8UI: 206 *(unsigned short*)element = (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 207 break; 208 case FORMAT_G16R16: 209 *(unsigned int*)element = (unorm<16>(g) << 16) | (unorm<16>(r) << 0); 210 break; 211 case FORMAT_G16R16I: 212 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(g)) << 16) | 213 (static_cast<unsigned int>(scast<16>(r)) << 0); 214 break; 215 case FORMAT_G16R16UI: 216 *(unsigned int*)element = (ucast<16>(g) << 16) | (ucast<16>(r) << 0); 217 break; 218 case FORMAT_G32R32I: 219 case FORMAT_G32R32UI: 220 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 221 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 222 break; 223 case FORMAT_A16B16G16R16: 224 ((unsigned short*)element)[0] = unorm<16>(r); 225 ((unsigned short*)element)[1] = unorm<16>(g); 226 ((unsigned short*)element)[2] = unorm<16>(b); 227 ((unsigned short*)element)[3] = unorm<16>(a); 228 break; 229 case FORMAT_A16B16G16R16I: 230 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r)); 231 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g)); 232 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b)); 233 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(a)); 234 break; 235 case FORMAT_A16B16G16R16UI: 236 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r)); 237 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g)); 238 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b)); 239 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(a)); 240 break; 241 case FORMAT_X16B16G16R16I: 242 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r)); 243 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g)); 244 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b)); 245 break; 246 case FORMAT_X16B16G16R16UI: 247 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r)); 248 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g)); 249 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b)); 250 break; 251 case FORMAT_A32B32G32R32I: 252 case FORMAT_A32B32G32R32UI: 253 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 254 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 255 ((unsigned int*)element)[2] = static_cast<unsigned int>(b); 256 ((unsigned int*)element)[3] = static_cast<unsigned int>(a); 257 break; 258 case FORMAT_X32B32G32R32I: 259 case FORMAT_X32B32G32R32UI: 260 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 261 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 262 ((unsigned int*)element)[2] = static_cast<unsigned int>(b); 263 break; 264 case FORMAT_V8U8: 265 *(unsigned short*)element = (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 266 break; 267 case FORMAT_L6V5U5: 268 *(unsigned short*)element = (unorm<6>(b) << 10) | (snorm<5>(g) << 5) | (snorm<5>(r) << 0); 269 break; 270 case FORMAT_Q8W8V8U8: 271 *(unsigned int*)element = (snorm<8>(a) << 24) | (snorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 272 break; 273 case FORMAT_X8L8V8U8: 274 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 275 break; 276 case FORMAT_V16U16: 277 *(unsigned int*)element = (snorm<16>(g) << 16) | (snorm<16>(r) << 0); 278 break; 279 case FORMAT_A2W10V10U10: 280 *(unsigned int*)element = (unorm<2>(a) << 30) | (snorm<10>(b) << 20) | (snorm<10>(g) << 10) | (snorm<10>(r) << 0); 281 break; 282 case FORMAT_A16W16V16U16: 283 ((unsigned short*)element)[0] = snorm<16>(r); 284 ((unsigned short*)element)[1] = snorm<16>(g); 285 ((unsigned short*)element)[2] = snorm<16>(b); 286 ((unsigned short*)element)[3] = unorm<16>(a); 287 break; 288 case FORMAT_Q16W16V16U16: 289 ((unsigned short*)element)[0] = snorm<16>(r); 290 ((unsigned short*)element)[1] = snorm<16>(g); 291 ((unsigned short*)element)[2] = snorm<16>(b); 292 ((unsigned short*)element)[3] = snorm<16>(a); 293 break; 294 case FORMAT_R8G8B8: 295 ((unsigned char*)element)[0] = unorm<8>(b); 296 ((unsigned char*)element)[1] = unorm<8>(g); 297 ((unsigned char*)element)[2] = unorm<8>(r); 298 break; 299 case FORMAT_B8G8R8: 300 ((unsigned char*)element)[0] = unorm<8>(r); 301 ((unsigned char*)element)[1] = unorm<8>(g); 302 ((unsigned char*)element)[2] = unorm<8>(b); 303 break; 304 case FORMAT_R16F: 305 *(half*)element = (half)r; 306 break; 307 case FORMAT_A16F: 308 *(half*)element = (half)a; 309 break; 310 case FORMAT_G16R16F: 311 ((half*)element)[0] = (half)r; 312 ((half*)element)[1] = (half)g; 313 break; 314 case FORMAT_X16B16G16R16F_UNSIGNED: 315 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f); 316 // Fall through to FORMAT_X16B16G16R16F. 317 case FORMAT_X16B16G16R16F: 318 ((half*)element)[3] = 1.0f; 319 // Fall through to FORMAT_B16G16R16F. 320 case FORMAT_B16G16R16F: 321 ((half*)element)[0] = (half)r; 322 ((half*)element)[1] = (half)g; 323 ((half*)element)[2] = (half)b; 324 break; 325 case FORMAT_A16B16G16R16F: 326 ((half*)element)[0] = (half)r; 327 ((half*)element)[1] = (half)g; 328 ((half*)element)[2] = (half)b; 329 ((half*)element)[3] = (half)a; 330 break; 331 case FORMAT_A32F: 332 *(float*)element = a; 333 break; 334 case FORMAT_R32F: 335 *(float*)element = r; 336 break; 337 case FORMAT_G32R32F: 338 ((float*)element)[0] = r; 339 ((float*)element)[1] = g; 340 break; 341 case FORMAT_X32B32G32R32F_UNSIGNED: 342 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f); 343 // Fall through to FORMAT_X32B32G32R32F. 344 case FORMAT_X32B32G32R32F: 345 ((float*)element)[3] = 1.0f; 346 // Fall through to FORMAT_B32G32R32F. 347 case FORMAT_B32G32R32F: 348 ((float*)element)[0] = r; 349 ((float*)element)[1] = g; 350 ((float*)element)[2] = b; 351 break; 352 case FORMAT_A32B32G32R32F: 353 ((float*)element)[0] = r; 354 ((float*)element)[1] = g; 355 ((float*)element)[2] = b; 356 ((float*)element)[3] = a; 357 break; 358 case FORMAT_D32F: 359 case FORMAT_D32FS8: 360 case FORMAT_D32F_LOCKABLE: 361 case FORMAT_D32FS8_TEXTURE: 362 case FORMAT_D32F_SHADOW: 363 case FORMAT_D32FS8_SHADOW: 364 *((float*)element) = r; 365 break; 366 case FORMAT_D32F_COMPLEMENTARY: 367 case FORMAT_D32FS8_COMPLEMENTARY: 368 *((float*)element) = 1 - r; 369 break; 370 case FORMAT_S8: 371 *((unsigned char*)element) = unorm<8>(r); 372 break; 373 case FORMAT_L8: 374 *(unsigned char*)element = unorm<8>(r); 375 break; 376 case FORMAT_A4L4: 377 *(unsigned char*)element = (unorm<4>(a) << 4) | (unorm<4>(r) << 0); 378 break; 379 case FORMAT_L16: 380 *(unsigned short*)element = unorm<16>(r); 381 break; 382 case FORMAT_A8L8: 383 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<8>(r) << 0); 384 break; 385 case FORMAT_L16F: 386 *(half*)element = (half)r; 387 break; 388 case FORMAT_A16L16F: 389 ((half*)element)[0] = (half)r; 390 ((half*)element)[1] = (half)a; 391 break; 392 case FORMAT_L32F: 393 *(float*)element = r; 394 break; 395 case FORMAT_A32L32F: 396 ((float*)element)[0] = r; 397 ((float*)element)[1] = a; 398 break; 399 default: 400 ASSERT(false); 401 } 402 } 403 read(int x,int y,int z) const404 Color<float> Surface::Buffer::read(int x, int y, int z) const 405 { 406 ASSERT((x >= -border) && (x < (width + border))); 407 ASSERT((y >= -border) && (y < (height + border))); 408 ASSERT((z >= 0) && (z < depth)); 409 410 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB; 411 412 return read(element); 413 } 414 read(int x,int y) const415 Color<float> Surface::Buffer::read(int x, int y) const 416 { 417 ASSERT((x >= -border) && (x < (width + border))); 418 ASSERT((y >= -border) && (y < (height + border))); 419 420 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB; 421 422 return read(element); 423 } 424 read(void * element) const425 inline Color<float> Surface::Buffer::read(void *element) const 426 { 427 float r = 0.0f; 428 float g = 0.0f; 429 float b = 0.0f; 430 float a = 1.0f; 431 432 switch(format) 433 { 434 case FORMAT_P8: 435 { 436 ASSERT(palette); 437 438 unsigned int abgr = palette[*(unsigned char*)element]; 439 440 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 441 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 442 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 443 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 444 } 445 break; 446 case FORMAT_A8P8: 447 { 448 ASSERT(palette); 449 450 unsigned int bgr = palette[((unsigned char*)element)[0]]; 451 452 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF); 453 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00); 454 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000); 455 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 456 } 457 break; 458 case FORMAT_A8: 459 r = 0; 460 g = 0; 461 b = 0; 462 a = *(unsigned char*)element * (1.0f / 0xFF); 463 break; 464 case FORMAT_R8_SNORM: 465 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f); 466 break; 467 case FORMAT_R8: 468 r = *(unsigned char*)element * (1.0f / 0xFF); 469 break; 470 case FORMAT_R8I: 471 r = *(signed char*)element; 472 break; 473 case FORMAT_R8UI: 474 r = *(unsigned char*)element; 475 break; 476 case FORMAT_R3G3B2: 477 { 478 unsigned char rgb = *(unsigned char*)element; 479 480 r = (rgb & 0xE0) * (1.0f / 0xE0); 481 g = (rgb & 0x1C) * (1.0f / 0x1C); 482 b = (rgb & 0x03) * (1.0f / 0x03); 483 } 484 break; 485 case FORMAT_A8R3G3B2: 486 { 487 unsigned short argb = *(unsigned short*)element; 488 489 a = (argb & 0xFF00) * (1.0f / 0xFF00); 490 r = (argb & 0x00E0) * (1.0f / 0x00E0); 491 g = (argb & 0x001C) * (1.0f / 0x001C); 492 b = (argb & 0x0003) * (1.0f / 0x0003); 493 } 494 break; 495 case FORMAT_X4R4G4B4: 496 { 497 unsigned short rgb = *(unsigned short*)element; 498 499 r = (rgb & 0x0F00) * (1.0f / 0x0F00); 500 g = (rgb & 0x00F0) * (1.0f / 0x00F0); 501 b = (rgb & 0x000F) * (1.0f / 0x000F); 502 } 503 break; 504 case FORMAT_A4R4G4B4: 505 { 506 unsigned short argb = *(unsigned short*)element; 507 508 a = (argb & 0xF000) * (1.0f / 0xF000); 509 r = (argb & 0x0F00) * (1.0f / 0x0F00); 510 g = (argb & 0x00F0) * (1.0f / 0x00F0); 511 b = (argb & 0x000F) * (1.0f / 0x000F); 512 } 513 break; 514 case FORMAT_R4G4B4A4: 515 { 516 unsigned short rgba = *(unsigned short*)element; 517 518 r = (rgba & 0xF000) * (1.0f / 0xF000); 519 g = (rgba & 0x0F00) * (1.0f / 0x0F00); 520 b = (rgba & 0x00F0) * (1.0f / 0x00F0); 521 a = (rgba & 0x000F) * (1.0f / 0x000F); 522 } 523 break; 524 case FORMAT_R5G6B5: 525 { 526 unsigned short rgb = *(unsigned short*)element; 527 528 r = (rgb & 0xF800) * (1.0f / 0xF800); 529 g = (rgb & 0x07E0) * (1.0f / 0x07E0); 530 b = (rgb & 0x001F) * (1.0f / 0x001F); 531 } 532 break; 533 case FORMAT_A1R5G5B5: 534 { 535 unsigned short argb = *(unsigned short*)element; 536 537 a = (argb & 0x8000) * (1.0f / 0x8000); 538 r = (argb & 0x7C00) * (1.0f / 0x7C00); 539 g = (argb & 0x03E0) * (1.0f / 0x03E0); 540 b = (argb & 0x001F) * (1.0f / 0x001F); 541 } 542 break; 543 case FORMAT_R5G5B5A1: 544 { 545 unsigned short rgba = *(unsigned short*)element; 546 547 r = (rgba & 0xF800) * (1.0f / 0xF800); 548 g = (rgba & 0x07C0) * (1.0f / 0x07C0); 549 b = (rgba & 0x003E) * (1.0f / 0x003E); 550 a = (rgba & 0x0001) * (1.0f / 0x0001); 551 } 552 break; 553 case FORMAT_X1R5G5B5: 554 { 555 unsigned short xrgb = *(unsigned short*)element; 556 557 r = (xrgb & 0x7C00) * (1.0f / 0x7C00); 558 g = (xrgb & 0x03E0) * (1.0f / 0x03E0); 559 b = (xrgb & 0x001F) * (1.0f / 0x001F); 560 } 561 break; 562 case FORMAT_A8R8G8B8: 563 { 564 unsigned int argb = *(unsigned int*)element; 565 566 a = (argb & 0xFF000000) * (1.0f / 0xFF000000); 567 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000); 568 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00); 569 b = (argb & 0x000000FF) * (1.0f / 0x000000FF); 570 } 571 break; 572 case FORMAT_X8R8G8B8: 573 { 574 unsigned int xrgb = *(unsigned int*)element; 575 576 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000); 577 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00); 578 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF); 579 } 580 break; 581 case FORMAT_A8B8G8R8_SNORM: 582 { 583 signed char* abgr = (signed char*)element; 584 585 r = max(abgr[0] * (1.0f / 0x7F), -1.0f); 586 g = max(abgr[1] * (1.0f / 0x7F), -1.0f); 587 b = max(abgr[2] * (1.0f / 0x7F), -1.0f); 588 a = max(abgr[3] * (1.0f / 0x7F), -1.0f); 589 } 590 break; 591 case FORMAT_A8B8G8R8: 592 case FORMAT_SRGB8_A8: 593 { 594 unsigned int abgr = *(unsigned int*)element; 595 596 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 597 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 598 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 599 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 600 } 601 break; 602 case FORMAT_A8B8G8R8I: 603 { 604 signed char* abgr = (signed char*)element; 605 606 r = abgr[0]; 607 g = abgr[1]; 608 b = abgr[2]; 609 a = abgr[3]; 610 } 611 break; 612 case FORMAT_A8B8G8R8UI: 613 { 614 unsigned char* abgr = (unsigned char*)element; 615 616 r = abgr[0]; 617 g = abgr[1]; 618 b = abgr[2]; 619 a = abgr[3]; 620 } 621 break; 622 case FORMAT_X8B8G8R8_SNORM: 623 { 624 signed char* bgr = (signed char*)element; 625 626 r = max(bgr[0] * (1.0f / 0x7F), -1.0f); 627 g = max(bgr[1] * (1.0f / 0x7F), -1.0f); 628 b = max(bgr[2] * (1.0f / 0x7F), -1.0f); 629 } 630 break; 631 case FORMAT_X8B8G8R8: 632 case FORMAT_SRGB8_X8: 633 { 634 unsigned int xbgr = *(unsigned int*)element; 635 636 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000); 637 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00); 638 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF); 639 } 640 break; 641 case FORMAT_X8B8G8R8I: 642 { 643 signed char* bgr = (signed char*)element; 644 645 r = bgr[0]; 646 g = bgr[1]; 647 b = bgr[2]; 648 } 649 break; 650 case FORMAT_X8B8G8R8UI: 651 { 652 unsigned char* bgr = (unsigned char*)element; 653 654 r = bgr[0]; 655 g = bgr[1]; 656 b = bgr[2]; 657 } 658 break; 659 case FORMAT_G8R8_SNORM: 660 { 661 signed char* gr = (signed char*)element; 662 663 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00); 664 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF); 665 } 666 break; 667 case FORMAT_G8R8: 668 { 669 unsigned short gr = *(unsigned short*)element; 670 671 g = (gr & 0xFF00) * (1.0f / 0xFF00); 672 r = (gr & 0x00FF) * (1.0f / 0x00FF); 673 } 674 break; 675 case FORMAT_G8R8I: 676 { 677 signed char* gr = (signed char*)element; 678 679 r = gr[0]; 680 g = gr[1]; 681 } 682 break; 683 case FORMAT_G8R8UI: 684 { 685 unsigned char* gr = (unsigned char*)element; 686 687 r = gr[0]; 688 g = gr[1]; 689 } 690 break; 691 case FORMAT_R16I: 692 r = *((short*)element); 693 break; 694 case FORMAT_R16UI: 695 r = *((unsigned short*)element); 696 break; 697 case FORMAT_G16R16I: 698 { 699 short* gr = (short*)element; 700 701 r = gr[0]; 702 g = gr[1]; 703 } 704 break; 705 case FORMAT_G16R16: 706 { 707 unsigned int gr = *(unsigned int*)element; 708 709 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000); 710 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF); 711 } 712 break; 713 case FORMAT_G16R16UI: 714 { 715 unsigned short* gr = (unsigned short*)element; 716 717 r = gr[0]; 718 g = gr[1]; 719 } 720 break; 721 case FORMAT_A2R10G10B10: 722 { 723 unsigned int argb = *(unsigned int*)element; 724 725 a = (argb & 0xC0000000) * (1.0f / 0xC0000000); 726 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000); 727 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00); 728 b = (argb & 0x000003FF) * (1.0f / 0x000003FF); 729 } 730 break; 731 case FORMAT_A2B10G10R10: 732 { 733 unsigned int abgr = *(unsigned int*)element; 734 735 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000); 736 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000); 737 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00); 738 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF); 739 } 740 break; 741 case FORMAT_A2B10G10R10UI: 742 { 743 unsigned int abgr = *(unsigned int*)element; 744 745 a = static_cast<float>((abgr & 0xC0000000) >> 30); 746 b = static_cast<float>((abgr & 0x3FF00000) >> 20); 747 g = static_cast<float>((abgr & 0x000FFC00) >> 10); 748 r = static_cast<float>(abgr & 0x000003FF); 749 } 750 break; 751 case FORMAT_A16B16G16R16I: 752 { 753 short* abgr = (short*)element; 754 755 r = abgr[0]; 756 g = abgr[1]; 757 b = abgr[2]; 758 a = abgr[3]; 759 } 760 break; 761 case FORMAT_A16B16G16R16: 762 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF); 763 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF); 764 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF); 765 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 766 break; 767 case FORMAT_A16B16G16R16UI: 768 { 769 unsigned short* abgr = (unsigned short*)element; 770 771 r = abgr[0]; 772 g = abgr[1]; 773 b = abgr[2]; 774 a = abgr[3]; 775 } 776 break; 777 case FORMAT_X16B16G16R16I: 778 { 779 short* bgr = (short*)element; 780 781 r = bgr[0]; 782 g = bgr[1]; 783 b = bgr[2]; 784 } 785 break; 786 case FORMAT_X16B16G16R16UI: 787 { 788 unsigned short* bgr = (unsigned short*)element; 789 790 r = bgr[0]; 791 g = bgr[1]; 792 b = bgr[2]; 793 } 794 break; 795 case FORMAT_A32B32G32R32I: 796 { 797 int* abgr = (int*)element; 798 799 r = static_cast<float>(abgr[0]); 800 g = static_cast<float>(abgr[1]); 801 b = static_cast<float>(abgr[2]); 802 a = static_cast<float>(abgr[3]); 803 } 804 break; 805 case FORMAT_A32B32G32R32UI: 806 { 807 unsigned int* abgr = (unsigned int*)element; 808 809 r = static_cast<float>(abgr[0]); 810 g = static_cast<float>(abgr[1]); 811 b = static_cast<float>(abgr[2]); 812 a = static_cast<float>(abgr[3]); 813 } 814 break; 815 case FORMAT_X32B32G32R32I: 816 { 817 int* bgr = (int*)element; 818 819 r = static_cast<float>(bgr[0]); 820 g = static_cast<float>(bgr[1]); 821 b = static_cast<float>(bgr[2]); 822 } 823 break; 824 case FORMAT_X32B32G32R32UI: 825 { 826 unsigned int* bgr = (unsigned int*)element; 827 828 r = static_cast<float>(bgr[0]); 829 g = static_cast<float>(bgr[1]); 830 b = static_cast<float>(bgr[2]); 831 } 832 break; 833 case FORMAT_G32R32I: 834 { 835 int* gr = (int*)element; 836 837 r = static_cast<float>(gr[0]); 838 g = static_cast<float>(gr[1]); 839 } 840 break; 841 case FORMAT_G32R32UI: 842 { 843 unsigned int* gr = (unsigned int*)element; 844 845 r = static_cast<float>(gr[0]); 846 g = static_cast<float>(gr[1]); 847 } 848 break; 849 case FORMAT_R32I: 850 r = static_cast<float>(*((int*)element)); 851 break; 852 case FORMAT_R32UI: 853 r = static_cast<float>(*((unsigned int*)element)); 854 break; 855 case FORMAT_V8U8: 856 { 857 unsigned short vu = *(unsigned short*)element; 858 859 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000); 860 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000); 861 } 862 break; 863 case FORMAT_L6V5U5: 864 { 865 unsigned short lvu = *(unsigned short*)element; 866 867 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000); 868 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000); 869 b = (lvu & 0xFC00) * (1.0f / 0xFC00); 870 } 871 break; 872 case FORMAT_Q8W8V8U8: 873 { 874 unsigned int qwvu = *(unsigned int*)element; 875 876 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 877 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 878 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000); 879 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000); 880 } 881 break; 882 case FORMAT_X8L8V8U8: 883 { 884 unsigned int xlvu = *(unsigned int*)element; 885 886 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 887 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 888 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000); 889 } 890 break; 891 case FORMAT_R8G8B8: 892 r = ((unsigned char*)element)[2] * (1.0f / 0xFF); 893 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 894 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 895 break; 896 case FORMAT_B8G8R8: 897 r = ((unsigned char*)element)[0] * (1.0f / 0xFF); 898 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 899 b = ((unsigned char*)element)[2] * (1.0f / 0xFF); 900 break; 901 case FORMAT_V16U16: 902 { 903 unsigned int vu = *(unsigned int*)element; 904 905 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000); 906 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000); 907 } 908 break; 909 case FORMAT_A2W10V10U10: 910 { 911 unsigned int awvu = *(unsigned int*)element; 912 913 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000); 914 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000); 915 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000); 916 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000); 917 } 918 break; 919 case FORMAT_A16W16V16U16: 920 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 921 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 922 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 923 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 924 break; 925 case FORMAT_Q16W16V16U16: 926 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 927 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 928 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 929 a = ((signed short*)element)[3] * (1.0f / 0x7FFF); 930 break; 931 case FORMAT_L8: 932 r = 933 g = 934 b = *(unsigned char*)element * (1.0f / 0xFF); 935 break; 936 case FORMAT_A4L4: 937 { 938 unsigned char al = *(unsigned char*)element; 939 940 r = 941 g = 942 b = (al & 0x0F) * (1.0f / 0x0F); 943 a = (al & 0xF0) * (1.0f / 0xF0); 944 } 945 break; 946 case FORMAT_L16: 947 r = 948 g = 949 b = *(unsigned short*)element * (1.0f / 0xFFFF); 950 break; 951 case FORMAT_A8L8: 952 r = 953 g = 954 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 955 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 956 break; 957 case FORMAT_L16F: 958 r = 959 g = 960 b = *(half*)element; 961 break; 962 case FORMAT_A16L16F: 963 r = 964 g = 965 b = ((half*)element)[0]; 966 a = ((half*)element)[1]; 967 break; 968 case FORMAT_L32F: 969 r = 970 g = 971 b = *(float*)element; 972 break; 973 case FORMAT_A32L32F: 974 r = 975 g = 976 b = ((float*)element)[0]; 977 a = ((float*)element)[1]; 978 break; 979 case FORMAT_A16F: 980 a = *(half*)element; 981 break; 982 case FORMAT_R16F: 983 r = *(half*)element; 984 break; 985 case FORMAT_G16R16F: 986 r = ((half*)element)[0]; 987 g = ((half*)element)[1]; 988 break; 989 case FORMAT_X16B16G16R16F: 990 case FORMAT_X16B16G16R16F_UNSIGNED: 991 case FORMAT_B16G16R16F: 992 r = ((half*)element)[0]; 993 g = ((half*)element)[1]; 994 b = ((half*)element)[2]; 995 break; 996 case FORMAT_A16B16G16R16F: 997 r = ((half*)element)[0]; 998 g = ((half*)element)[1]; 999 b = ((half*)element)[2]; 1000 a = ((half*)element)[3]; 1001 break; 1002 case FORMAT_A32F: 1003 a = *(float*)element; 1004 break; 1005 case FORMAT_R32F: 1006 r = *(float*)element; 1007 break; 1008 case FORMAT_G32R32F: 1009 r = ((float*)element)[0]; 1010 g = ((float*)element)[1]; 1011 break; 1012 case FORMAT_X32B32G32R32F: 1013 case FORMAT_X32B32G32R32F_UNSIGNED: 1014 case FORMAT_B32G32R32F: 1015 r = ((float*)element)[0]; 1016 g = ((float*)element)[1]; 1017 b = ((float*)element)[2]; 1018 break; 1019 case FORMAT_A32B32G32R32F: 1020 r = ((float*)element)[0]; 1021 g = ((float*)element)[1]; 1022 b = ((float*)element)[2]; 1023 a = ((float*)element)[3]; 1024 break; 1025 case FORMAT_D32F: 1026 case FORMAT_D32FS8: 1027 case FORMAT_D32F_LOCKABLE: 1028 case FORMAT_D32FS8_TEXTURE: 1029 case FORMAT_D32F_SHADOW: 1030 case FORMAT_D32FS8_SHADOW: 1031 r = *(float*)element; 1032 g = r; 1033 b = r; 1034 a = r; 1035 break; 1036 case FORMAT_D32F_COMPLEMENTARY: 1037 case FORMAT_D32FS8_COMPLEMENTARY: 1038 r = 1.0f - *(float*)element; 1039 g = r; 1040 b = r; 1041 a = r; 1042 break; 1043 case FORMAT_S8: 1044 r = *(unsigned char*)element * (1.0f / 0xFF); 1045 break; 1046 default: 1047 ASSERT(false); 1048 } 1049 1050 if(isSRGBformat(format)) 1051 { 1052 r = sRGBtoLinear(r); 1053 g = sRGBtoLinear(g); 1054 b = sRGBtoLinear(b); 1055 } 1056 1057 return Color<float>(r, g, b, a); 1058 } 1059 sample(float x,float y,float z) const1060 Color<float> Surface::Buffer::sample(float x, float y, float z) const 1061 { 1062 x -= 0.5f; 1063 y -= 0.5f; 1064 z -= 0.5f; 1065 1066 int x0 = clamp((int)x, 0, width - 1); 1067 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1068 1069 int y0 = clamp((int)y, 0, height - 1); 1070 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1071 1072 int z0 = clamp((int)z, 0, depth - 1); 1073 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1; 1074 1075 Color<float> c000 = read(x0, y0, z0); 1076 Color<float> c100 = read(x1, y0, z0); 1077 Color<float> c010 = read(x0, y1, z0); 1078 Color<float> c110 = read(x1, y1, z0); 1079 Color<float> c001 = read(x0, y0, z1); 1080 Color<float> c101 = read(x1, y0, z1); 1081 Color<float> c011 = read(x0, y1, z1); 1082 Color<float> c111 = read(x1, y1, z1); 1083 1084 float fx = x - x0; 1085 float fy = y - y0; 1086 float fz = z - z0; 1087 1088 c000 *= (1 - fx) * (1 - fy) * (1 - fz); 1089 c100 *= fx * (1 - fy) * (1 - fz); 1090 c010 *= (1 - fx) * fy * (1 - fz); 1091 c110 *= fx * fy * (1 - fz); 1092 c001 *= (1 - fx) * (1 - fy) * fz; 1093 c101 *= fx * (1 - fy) * fz; 1094 c011 *= (1 - fx) * fy * fz; 1095 c111 *= fx * fy * fz; 1096 1097 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111; 1098 } 1099 sample(float x,float y,int layer) const1100 Color<float> Surface::Buffer::sample(float x, float y, int layer) const 1101 { 1102 x -= 0.5f; 1103 y -= 0.5f; 1104 1105 int x0 = clamp((int)x, 0, width - 1); 1106 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1107 1108 int y0 = clamp((int)y, 0, height - 1); 1109 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1110 1111 Color<float> c00 = read(x0, y0, layer); 1112 Color<float> c10 = read(x1, y0, layer); 1113 Color<float> c01 = read(x0, y1, layer); 1114 Color<float> c11 = read(x1, y1, layer); 1115 1116 float fx = x - x0; 1117 float fy = y - y0; 1118 1119 c00 *= (1 - fx) * (1 - fy); 1120 c10 *= fx * (1 - fy); 1121 c01 *= (1 - fx) * fy; 1122 c11 *= fx * fy; 1123 1124 return c00 + c10 + c01 + c11; 1125 } 1126 lockRect(int x,int y,int z,Lock lock)1127 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock) 1128 { 1129 this->lock = lock; 1130 1131 switch(lock) 1132 { 1133 case LOCK_UNLOCKED: 1134 case LOCK_READONLY: 1135 case LOCK_UPDATE: 1136 break; 1137 case LOCK_WRITEONLY: 1138 case LOCK_READWRITE: 1139 case LOCK_DISCARD: 1140 dirty = true; 1141 break; 1142 default: 1143 ASSERT(false); 1144 } 1145 1146 if(buffer) 1147 { 1148 x += border; 1149 y += border; 1150 1151 switch(format) 1152 { 1153 case FORMAT_DXT1: 1154 case FORMAT_ATI1: 1155 case FORMAT_ETC1: 1156 case FORMAT_R11_EAC: 1157 case FORMAT_SIGNED_R11_EAC: 1158 case FORMAT_RGB8_ETC2: 1159 case FORMAT_SRGB8_ETC2: 1160 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1161 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1162 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1163 case FORMAT_RG11_EAC: 1164 case FORMAT_SIGNED_RG11_EAC: 1165 case FORMAT_RGBA8_ETC2_EAC: 1166 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1167 case FORMAT_RGBA_ASTC_4x4_KHR: 1168 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1169 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1170 case FORMAT_RGBA_ASTC_5x4_KHR: 1171 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1172 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB; 1173 case FORMAT_RGBA_ASTC_5x5_KHR: 1174 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1175 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB; 1176 case FORMAT_RGBA_ASTC_6x5_KHR: 1177 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1178 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB; 1179 case FORMAT_RGBA_ASTC_6x6_KHR: 1180 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1181 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB; 1182 case FORMAT_RGBA_ASTC_8x5_KHR: 1183 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1184 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB; 1185 case FORMAT_RGBA_ASTC_8x6_KHR: 1186 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1187 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB; 1188 case FORMAT_RGBA_ASTC_8x8_KHR: 1189 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1190 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB; 1191 case FORMAT_RGBA_ASTC_10x5_KHR: 1192 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1193 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB; 1194 case FORMAT_RGBA_ASTC_10x6_KHR: 1195 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1196 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB; 1197 case FORMAT_RGBA_ASTC_10x8_KHR: 1198 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1199 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB; 1200 case FORMAT_RGBA_ASTC_10x10_KHR: 1201 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1202 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB; 1203 case FORMAT_RGBA_ASTC_12x10_KHR: 1204 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1205 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB; 1206 case FORMAT_RGBA_ASTC_12x12_KHR: 1207 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1208 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB; 1209 case FORMAT_DXT3: 1210 case FORMAT_DXT5: 1211 case FORMAT_ATI2: 1212 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1213 default: 1214 return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB; 1215 } 1216 } 1217 1218 return nullptr; 1219 } 1220 unlockRect()1221 void Surface::Buffer::unlockRect() 1222 { 1223 lock = LOCK_UNLOCKED; 1224 } 1225 1226 class SurfaceImplementation : public Surface 1227 { 1228 public: SurfaceImplementation(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1229 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1230 : Surface(width, height, depth, format, pixels, pitch, slice) {} SurfaceImplementation(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchP=0)1231 SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0) 1232 : Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {} ~SurfaceImplementation()1233 ~SurfaceImplementation() override {}; 1234 lockInternal(int x,int y,int z,Lock lock,Accessor client)1235 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override 1236 { 1237 return Surface::lockInternal(x, y, z, lock, client); 1238 } 1239 unlockInternal()1240 void unlockInternal() override 1241 { 1242 Surface::unlockInternal(); 1243 } 1244 }; 1245 create(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1246 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1247 { 1248 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice); 1249 } 1250 create(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchPprovided)1251 Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) 1252 { 1253 return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided); 1254 } 1255 Surface(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1256 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false) 1257 { 1258 resource = new Resource(0); 1259 hasParent = false; 1260 ownExternal = false; 1261 depth = max(1, depth); 1262 1263 external.buffer = pixels; 1264 external.width = width; 1265 external.height = height; 1266 external.depth = depth; 1267 external.samples = 1; 1268 external.format = format; 1269 external.bytes = bytes(external.format); 1270 external.pitchB = pitch; 1271 external.pitchP = external.bytes ? pitch / external.bytes : 0; 1272 external.sliceB = slice; 1273 external.sliceP = external.bytes ? slice / external.bytes : 0; 1274 external.border = 0; 1275 external.lock = LOCK_UNLOCKED; 1276 external.dirty = true; 1277 1278 internal.buffer = nullptr; 1279 internal.width = width; 1280 internal.height = height; 1281 internal.depth = depth; 1282 internal.samples = 1; 1283 internal.format = selectInternalFormat(format); 1284 internal.bytes = bytes(internal.format); 1285 internal.pitchB = pitchB(internal.width, 0, internal.format, false); 1286 internal.pitchP = pitchP(internal.width, 0, internal.format, false); 1287 internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false); 1288 internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false); 1289 internal.border = 0; 1290 internal.lock = LOCK_UNLOCKED; 1291 internal.dirty = false; 1292 1293 stencil.buffer = nullptr; 1294 stencil.width = width; 1295 stencil.height = height; 1296 stencil.depth = depth; 1297 stencil.samples = 1; 1298 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL; 1299 stencil.bytes = bytes(stencil.format); 1300 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false); 1301 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false); 1302 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false); 1303 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false); 1304 stencil.border = 0; 1305 stencil.lock = LOCK_UNLOCKED; 1306 stencil.dirty = false; 1307 1308 dirtyContents = true; 1309 paletteUsed = 0; 1310 } 1311 Surface(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchPprovided)1312 Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget) 1313 { 1314 resource = texture ? texture : new Resource(0); 1315 hasParent = texture != nullptr; 1316 ownExternal = true; 1317 depth = max(1, depth); 1318 samples = max(1, samples); 1319 1320 external.buffer = nullptr; 1321 external.width = width; 1322 external.height = height; 1323 external.depth = depth; 1324 external.samples = (short)samples; 1325 external.format = format; 1326 external.bytes = bytes(external.format); 1327 external.pitchB = !pitchPprovided ? pitchB(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided * external.bytes; 1328 external.pitchP = !pitchPprovided ? pitchP(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided; 1329 external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture); 1330 external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture); 1331 external.border = 0; 1332 external.lock = LOCK_UNLOCKED; 1333 external.dirty = false; 1334 1335 internal.buffer = nullptr; 1336 internal.width = width; 1337 internal.height = height; 1338 internal.depth = depth; 1339 internal.samples = (short)samples; 1340 internal.format = selectInternalFormat(format); 1341 internal.bytes = bytes(internal.format); 1342 internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes; 1343 internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided; 1344 internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget); 1345 internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget); 1346 internal.border = (short)border; 1347 internal.lock = LOCK_UNLOCKED; 1348 internal.dirty = false; 1349 1350 stencil.buffer = nullptr; 1351 stencil.width = width; 1352 stencil.height = height; 1353 stencil.depth = depth; 1354 stencil.samples = (short)samples; 1355 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL; 1356 stencil.bytes = bytes(stencil.format); 1357 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget); 1358 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget); 1359 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget); 1360 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget); 1361 stencil.border = 0; 1362 stencil.lock = LOCK_UNLOCKED; 1363 stencil.dirty = false; 1364 1365 dirtyContents = true; 1366 paletteUsed = 0; 1367 } 1368 ~Surface()1369 Surface::~Surface() 1370 { 1371 // sync() must be called before this destructor to ensure all locks have been released. 1372 // We can't call it here because the parent resource may already have been destroyed. 1373 ASSERT(isUnlocked()); 1374 1375 if(!hasParent) 1376 { 1377 resource->destruct(); 1378 } 1379 1380 if(ownExternal) 1381 { 1382 deallocate(external.buffer); 1383 } 1384 1385 if(internal.buffer != external.buffer) 1386 { 1387 deallocate(internal.buffer); 1388 } 1389 1390 deallocate(stencil.buffer); 1391 1392 external.buffer = nullptr; 1393 internal.buffer = nullptr; 1394 stencil.buffer = nullptr; 1395 } 1396 lockExternal(int x,int y,int z,Lock lock,Accessor client)1397 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client) 1398 { 1399 resource->lock(client); 1400 1401 if(!external.buffer) 1402 { 1403 if(internal.buffer && identicalBuffers()) 1404 { 1405 external.buffer = internal.buffer; 1406 } 1407 else 1408 { 1409 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format); 1410 } 1411 } 1412 1413 if(internal.dirty) 1414 { 1415 if(lock != LOCK_DISCARD) 1416 { 1417 update(external, internal); 1418 } 1419 1420 internal.dirty = false; 1421 } 1422 1423 switch(lock) 1424 { 1425 case LOCK_READONLY: 1426 break; 1427 case LOCK_WRITEONLY: 1428 case LOCK_READWRITE: 1429 case LOCK_DISCARD: 1430 dirtyContents = true; 1431 break; 1432 default: 1433 ASSERT(false); 1434 } 1435 1436 return external.lockRect(x, y, z, lock); 1437 } 1438 unlockExternal()1439 void Surface::unlockExternal() 1440 { 1441 external.unlockRect(); 1442 1443 resource->unlock(); 1444 } 1445 lockInternal(int x,int y,int z,Lock lock,Accessor client)1446 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client) 1447 { 1448 if(lock != LOCK_UNLOCKED) 1449 { 1450 resource->lock(client); 1451 } 1452 1453 if(!internal.buffer) 1454 { 1455 if(external.buffer && identicalBuffers()) 1456 { 1457 internal.buffer = external.buffer; 1458 } 1459 else 1460 { 1461 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format); 1462 } 1463 } 1464 1465 // FIXME: WHQL requires conversion to lower external precision and back 1466 if(logPrecision >= WHQL) 1467 { 1468 if(internal.dirty && renderTarget && internal.format != external.format) 1469 { 1470 if(lock != LOCK_DISCARD) 1471 { 1472 switch(external.format) 1473 { 1474 case FORMAT_R3G3B2: 1475 case FORMAT_A8R3G3B2: 1476 case FORMAT_A1R5G5B5: 1477 case FORMAT_A2R10G10B10: 1478 case FORMAT_A2B10G10R10: 1479 lockExternal(0, 0, 0, LOCK_READWRITE, client); 1480 unlockExternal(); 1481 break; 1482 default: 1483 // Difference passes WHQL 1484 break; 1485 } 1486 } 1487 } 1488 } 1489 1490 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID)) 1491 { 1492 if(lock != LOCK_DISCARD) 1493 { 1494 update(internal, external); 1495 } 1496 1497 external.dirty = false; 1498 paletteUsed = Surface::paletteID; 1499 } 1500 1501 switch(lock) 1502 { 1503 case LOCK_UNLOCKED: 1504 case LOCK_READONLY: 1505 break; 1506 case LOCK_WRITEONLY: 1507 case LOCK_READWRITE: 1508 case LOCK_DISCARD: 1509 dirtyContents = true; 1510 break; 1511 default: 1512 ASSERT(false); 1513 } 1514 1515 if(lock == LOCK_READONLY && client == PUBLIC) 1516 { 1517 resolve(); 1518 } 1519 1520 return internal.lockRect(x, y, z, lock); 1521 } 1522 unlockInternal()1523 void Surface::unlockInternal() 1524 { 1525 internal.unlockRect(); 1526 1527 resource->unlock(); 1528 } 1529 lockStencil(int x,int y,int front,Accessor client)1530 void *Surface::lockStencil(int x, int y, int front, Accessor client) 1531 { 1532 resource->lock(client); 1533 1534 if(stencil.format == FORMAT_NULL) 1535 { 1536 return nullptr; 1537 } 1538 1539 if(!stencil.buffer) 1540 { 1541 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format); 1542 } 1543 1544 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME 1545 } 1546 unlockStencil()1547 void Surface::unlockStencil() 1548 { 1549 stencil.unlockRect(); 1550 1551 resource->unlock(); 1552 } 1553 bytes(Format format)1554 int Surface::bytes(Format format) 1555 { 1556 switch(format) 1557 { 1558 case FORMAT_NULL: return 0; 1559 case FORMAT_P8: return 1; 1560 case FORMAT_A8P8: return 2; 1561 case FORMAT_A8: return 1; 1562 case FORMAT_R8I: return 1; 1563 case FORMAT_R8: return 1; 1564 case FORMAT_R3G3B2: return 1; 1565 case FORMAT_R16I: return 2; 1566 case FORMAT_R16UI: return 2; 1567 case FORMAT_A8R3G3B2: return 2; 1568 case FORMAT_R5G6B5: return 2; 1569 case FORMAT_A1R5G5B5: return 2; 1570 case FORMAT_X1R5G5B5: return 2; 1571 case FORMAT_R5G5B5A1: return 2; 1572 case FORMAT_X4R4G4B4: return 2; 1573 case FORMAT_A4R4G4B4: return 2; 1574 case FORMAT_R4G4B4A4: return 2; 1575 case FORMAT_R8G8B8: return 3; 1576 case FORMAT_B8G8R8: return 3; 1577 case FORMAT_R32I: return 4; 1578 case FORMAT_R32UI: return 4; 1579 case FORMAT_X8R8G8B8: return 4; 1580 // case FORMAT_X8G8R8B8Q: return 4; 1581 case FORMAT_A8R8G8B8: return 4; 1582 // case FORMAT_A8G8R8B8Q: return 4; 1583 case FORMAT_X8B8G8R8I: return 4; 1584 case FORMAT_X8B8G8R8: return 4; 1585 case FORMAT_SRGB8_X8: return 4; 1586 case FORMAT_SRGB8_A8: return 4; 1587 case FORMAT_A8B8G8R8I: return 4; 1588 case FORMAT_R8UI: return 1; 1589 case FORMAT_G8R8UI: return 2; 1590 case FORMAT_X8B8G8R8UI: return 4; 1591 case FORMAT_A8B8G8R8UI: return 4; 1592 case FORMAT_A8B8G8R8: return 4; 1593 case FORMAT_R8_SNORM: return 1; 1594 case FORMAT_G8R8_SNORM: return 2; 1595 case FORMAT_X8B8G8R8_SNORM: return 4; 1596 case FORMAT_A8B8G8R8_SNORM: return 4; 1597 case FORMAT_A2R10G10B10: return 4; 1598 case FORMAT_A2B10G10R10: return 4; 1599 case FORMAT_A2B10G10R10UI: return 4; 1600 case FORMAT_G8R8I: return 2; 1601 case FORMAT_G8R8: return 2; 1602 case FORMAT_G16R16I: return 4; 1603 case FORMAT_G16R16UI: return 4; 1604 case FORMAT_G16R16: return 4; 1605 case FORMAT_G32R32I: return 8; 1606 case FORMAT_G32R32UI: return 8; 1607 case FORMAT_X16B16G16R16I: return 8; 1608 case FORMAT_X16B16G16R16UI: return 8; 1609 case FORMAT_A16B16G16R16I: return 8; 1610 case FORMAT_A16B16G16R16UI: return 8; 1611 case FORMAT_A16B16G16R16: return 8; 1612 case FORMAT_X32B32G32R32I: return 16; 1613 case FORMAT_X32B32G32R32UI: return 16; 1614 case FORMAT_A32B32G32R32I: return 16; 1615 case FORMAT_A32B32G32R32UI: return 16; 1616 // Compressed formats 1617 case FORMAT_DXT1: return 2; // Column of four pixels 1618 case FORMAT_DXT3: return 4; // Column of four pixels 1619 case FORMAT_DXT5: return 4; // Column of four pixels 1620 case FORMAT_ATI1: return 2; // Column of four pixels 1621 case FORMAT_ATI2: return 4; // Column of four pixels 1622 case FORMAT_ETC1: return 2; // Column of four pixels 1623 case FORMAT_R11_EAC: return 2; 1624 case FORMAT_SIGNED_R11_EAC: return 2; 1625 case FORMAT_RG11_EAC: return 4; 1626 case FORMAT_SIGNED_RG11_EAC: return 4; 1627 case FORMAT_RGB8_ETC2: return 2; 1628 case FORMAT_SRGB8_ETC2: return 2; 1629 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1630 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1631 case FORMAT_RGBA8_ETC2_EAC: return 4; 1632 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4; 1633 case FORMAT_RGBA_ASTC_4x4_KHR: 1634 case FORMAT_RGBA_ASTC_5x4_KHR: 1635 case FORMAT_RGBA_ASTC_5x5_KHR: 1636 case FORMAT_RGBA_ASTC_6x5_KHR: 1637 case FORMAT_RGBA_ASTC_6x6_KHR: 1638 case FORMAT_RGBA_ASTC_8x5_KHR: 1639 case FORMAT_RGBA_ASTC_8x6_KHR: 1640 case FORMAT_RGBA_ASTC_8x8_KHR: 1641 case FORMAT_RGBA_ASTC_10x5_KHR: 1642 case FORMAT_RGBA_ASTC_10x6_KHR: 1643 case FORMAT_RGBA_ASTC_10x8_KHR: 1644 case FORMAT_RGBA_ASTC_10x10_KHR: 1645 case FORMAT_RGBA_ASTC_12x10_KHR: 1646 case FORMAT_RGBA_ASTC_12x12_KHR: 1647 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1648 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1649 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1650 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1651 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1652 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1653 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1654 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1655 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1656 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1657 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1658 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1659 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1660 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME 1661 // Bumpmap formats 1662 case FORMAT_V8U8: return 2; 1663 case FORMAT_L6V5U5: return 2; 1664 case FORMAT_Q8W8V8U8: return 4; 1665 case FORMAT_X8L8V8U8: return 4; 1666 case FORMAT_A2W10V10U10: return 4; 1667 case FORMAT_V16U16: return 4; 1668 case FORMAT_A16W16V16U16: return 8; 1669 case FORMAT_Q16W16V16U16: return 8; 1670 // Luminance formats 1671 case FORMAT_L8: return 1; 1672 case FORMAT_A4L4: return 1; 1673 case FORMAT_L16: return 2; 1674 case FORMAT_A8L8: return 2; 1675 case FORMAT_L16F: return 2; 1676 case FORMAT_A16L16F: return 4; 1677 case FORMAT_L32F: return 4; 1678 case FORMAT_A32L32F: return 8; 1679 // Floating-point formats 1680 case FORMAT_A16F: return 2; 1681 case FORMAT_R16F: return 2; 1682 case FORMAT_G16R16F: return 4; 1683 case FORMAT_B16G16R16F: return 6; 1684 case FORMAT_X16B16G16R16F: return 8; 1685 case FORMAT_A16B16G16R16F: return 8; 1686 case FORMAT_X16B16G16R16F_UNSIGNED: return 8; 1687 case FORMAT_A32F: return 4; 1688 case FORMAT_R32F: return 4; 1689 case FORMAT_G32R32F: return 8; 1690 case FORMAT_B32G32R32F: return 12; 1691 case FORMAT_X32B32G32R32F: return 16; 1692 case FORMAT_A32B32G32R32F: return 16; 1693 case FORMAT_X32B32G32R32F_UNSIGNED: return 16; 1694 // Depth/stencil formats 1695 case FORMAT_D16: return 2; 1696 case FORMAT_D32: return 4; 1697 case FORMAT_D24X8: return 4; 1698 case FORMAT_D24S8: return 4; 1699 case FORMAT_D24FS8: return 4; 1700 case FORMAT_D32F: return 4; 1701 case FORMAT_D32FS8: return 4; 1702 case FORMAT_D32F_COMPLEMENTARY: return 4; 1703 case FORMAT_D32FS8_COMPLEMENTARY: return 4; 1704 case FORMAT_D32F_LOCKABLE: return 4; 1705 case FORMAT_D32FS8_TEXTURE: return 4; 1706 case FORMAT_D32F_SHADOW: return 4; 1707 case FORMAT_D32FS8_SHADOW: return 4; 1708 case FORMAT_DF24S8: return 4; 1709 case FORMAT_DF16S8: return 2; 1710 case FORMAT_INTZ: return 4; 1711 case FORMAT_S8: return 1; 1712 case FORMAT_YV12_BT601: return 1; // Y plane only 1713 case FORMAT_YV12_BT709: return 1; // Y plane only 1714 case FORMAT_YV12_JFIF: return 1; // Y plane only 1715 default: 1716 ASSERT(false); 1717 } 1718 1719 return 0; 1720 } 1721 pitchB(int width,int border,Format format,bool target)1722 int Surface::pitchB(int width, int border, Format format, bool target) 1723 { 1724 width += 2 * border; 1725 1726 // Render targets require 2x2 quads 1727 if(target || isDepth(format) || isStencil(format)) 1728 { 1729 width = align<2>(width); 1730 } 1731 1732 switch(format) 1733 { 1734 case FORMAT_DXT1: 1735 case FORMAT_ETC1: 1736 case FORMAT_R11_EAC: 1737 case FORMAT_SIGNED_R11_EAC: 1738 case FORMAT_RGB8_ETC2: 1739 case FORMAT_SRGB8_ETC2: 1740 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1741 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1742 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows 1743 case FORMAT_RG11_EAC: 1744 case FORMAT_SIGNED_RG11_EAC: 1745 case FORMAT_RGBA8_ETC2_EAC: 1746 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1747 case FORMAT_RGBA_ASTC_4x4_KHR: 1748 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1749 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1750 case FORMAT_RGBA_ASTC_5x4_KHR: 1751 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1752 case FORMAT_RGBA_ASTC_5x5_KHR: 1753 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1754 return 16 * ((width + 4) / 5); 1755 case FORMAT_RGBA_ASTC_6x5_KHR: 1756 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1757 case FORMAT_RGBA_ASTC_6x6_KHR: 1758 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1759 return 16 * ((width + 5) / 6); 1760 case FORMAT_RGBA_ASTC_8x5_KHR: 1761 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1762 case FORMAT_RGBA_ASTC_8x6_KHR: 1763 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1764 case FORMAT_RGBA_ASTC_8x8_KHR: 1765 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1766 return 16 * ((width + 7) / 8); 1767 case FORMAT_RGBA_ASTC_10x5_KHR: 1768 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1769 case FORMAT_RGBA_ASTC_10x6_KHR: 1770 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1771 case FORMAT_RGBA_ASTC_10x8_KHR: 1772 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1773 case FORMAT_RGBA_ASTC_10x10_KHR: 1774 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1775 return 16 * ((width + 9) / 10); 1776 case FORMAT_RGBA_ASTC_12x10_KHR: 1777 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1778 case FORMAT_RGBA_ASTC_12x12_KHR: 1779 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1780 return 16 * ((width + 11) / 12); 1781 case FORMAT_DXT3: 1782 case FORMAT_DXT5: 1783 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1784 case FORMAT_ATI1: 1785 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row 1786 case FORMAT_ATI2: 1787 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row 1788 case FORMAT_YV12_BT601: 1789 case FORMAT_YV12_BT709: 1790 case FORMAT_YV12_JFIF: 1791 return align<16>(width); 1792 default: 1793 return bytes(format) * width; 1794 } 1795 } 1796 pitchP(int width,int border,Format format,bool target)1797 int Surface::pitchP(int width, int border, Format format, bool target) 1798 { 1799 int B = bytes(format); 1800 1801 return B > 0 ? pitchB(width, border, format, target) / B : 0; 1802 } 1803 sliceB(int width,int height,int border,Format format,bool target)1804 int Surface::sliceB(int width, int height, int border, Format format, bool target) 1805 { 1806 height += 2 * border; 1807 1808 // Render targets require 2x2 quads 1809 if(target || isDepth(format) || isStencil(format)) 1810 { 1811 height = align<2>(height); 1812 } 1813 1814 switch(format) 1815 { 1816 case FORMAT_DXT1: 1817 case FORMAT_DXT3: 1818 case FORMAT_DXT5: 1819 case FORMAT_ETC1: 1820 case FORMAT_R11_EAC: 1821 case FORMAT_SIGNED_R11_EAC: 1822 case FORMAT_RG11_EAC: 1823 case FORMAT_SIGNED_RG11_EAC: 1824 case FORMAT_RGB8_ETC2: 1825 case FORMAT_SRGB8_ETC2: 1826 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1827 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1828 case FORMAT_RGBA8_ETC2_EAC: 1829 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1830 case FORMAT_RGBA_ASTC_4x4_KHR: 1831 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1832 case FORMAT_RGBA_ASTC_5x4_KHR: 1833 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1834 return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows 1835 case FORMAT_RGBA_ASTC_5x5_KHR: 1836 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1837 case FORMAT_RGBA_ASTC_6x5_KHR: 1838 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1839 case FORMAT_RGBA_ASTC_8x5_KHR: 1840 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1841 case FORMAT_RGBA_ASTC_10x5_KHR: 1842 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1843 return pitchB(width, border, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows 1844 case FORMAT_RGBA_ASTC_6x6_KHR: 1845 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1846 case FORMAT_RGBA_ASTC_8x6_KHR: 1847 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1848 case FORMAT_RGBA_ASTC_10x6_KHR: 1849 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1850 return pitchB(width, border, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows 1851 case FORMAT_RGBA_ASTC_8x8_KHR: 1852 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1853 case FORMAT_RGBA_ASTC_10x8_KHR: 1854 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1855 return pitchB(width, border, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows 1856 case FORMAT_RGBA_ASTC_10x10_KHR: 1857 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1858 case FORMAT_RGBA_ASTC_12x10_KHR: 1859 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1860 return pitchB(width, border, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows 1861 case FORMAT_RGBA_ASTC_12x12_KHR: 1862 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1863 return pitchB(width, border, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows 1864 case FORMAT_ATI1: 1865 case FORMAT_ATI2: 1866 return pitchB(width, border, format, target) * align<4>(height); // Pitch computed per row 1867 default: 1868 return pitchB(width, border, format, target) * height; // Pitch computed per row 1869 } 1870 } 1871 sliceP(int width,int height,int border,Format format,bool target)1872 int Surface::sliceP(int width, int height, int border, Format format, bool target) 1873 { 1874 int B = bytes(format); 1875 1876 return B > 0 ? sliceB(width, height, border, format, target) / B : 0; 1877 } 1878 update(Buffer & destination,Buffer & source)1879 void Surface::update(Buffer &destination, Buffer &source) 1880 { 1881 // ASSERT(source.lock != LOCK_UNLOCKED); 1882 // ASSERT(destination.lock != LOCK_UNLOCKED); 1883 1884 if(destination.buffer != source.buffer) 1885 { 1886 ASSERT(source.dirty && !destination.dirty); 1887 1888 switch(source.format) 1889 { 1890 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format 1891 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format 1892 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format 1893 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format 1894 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format 1895 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format 1896 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format 1897 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format 1898 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format 1899 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format 1900 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format 1901 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format 1902 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format 1903 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format 1904 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format 1905 case FORMAT_ETC1: 1906 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format 1907 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format 1908 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format 1909 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format 1910 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format 1911 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format 1912 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format 1913 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format 1914 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format 1915 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format 1916 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format 1917 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format 1918 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format 1919 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format 1920 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format 1921 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format 1922 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format 1923 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format 1924 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format 1925 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format 1926 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format 1927 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format 1928 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format 1929 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format 1930 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format 1931 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format 1932 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format 1933 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format 1934 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format 1935 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format 1936 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format 1937 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format 1938 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format 1939 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format 1940 default: genericUpdate(destination, source); break; 1941 } 1942 } 1943 } 1944 genericUpdate(Buffer & destination,Buffer & source)1945 void Surface::genericUpdate(Buffer &destination, Buffer &source) 1946 { 1947 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1948 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1949 1950 int depth = min(destination.depth, source.depth); 1951 int height = min(destination.height, source.height); 1952 int width = min(destination.width, source.width); 1953 int rowBytes = width * source.bytes; 1954 1955 for(int z = 0; z < depth; z++) 1956 { 1957 unsigned char *sourceRow = sourceSlice; 1958 unsigned char *destinationRow = destinationSlice; 1959 1960 for(int y = 0; y < height; y++) 1961 { 1962 if(source.format == destination.format) 1963 { 1964 memcpy(destinationRow, sourceRow, rowBytes); 1965 } 1966 else 1967 { 1968 unsigned char *sourceElement = sourceRow; 1969 unsigned char *destinationElement = destinationRow; 1970 1971 for(int x = 0; x < width; x++) 1972 { 1973 Color<float> color = source.read(sourceElement); 1974 destination.write(destinationElement, color); 1975 1976 sourceElement += source.bytes; 1977 destinationElement += destination.bytes; 1978 } 1979 } 1980 1981 sourceRow += source.pitchB; 1982 destinationRow += destination.pitchB; 1983 } 1984 1985 sourceSlice += source.sliceB; 1986 destinationSlice += destination.sliceB; 1987 } 1988 1989 source.unlockRect(); 1990 destination.unlockRect(); 1991 } 1992 decodeR8G8B8(Buffer & destination,Buffer & source)1993 void Surface::decodeR8G8B8(Buffer &destination, Buffer &source) 1994 { 1995 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1996 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1997 1998 int depth = min(destination.depth, source.depth); 1999 int height = min(destination.height, source.height); 2000 int width = min(destination.width, source.width); 2001 2002 for(int z = 0; z < depth; z++) 2003 { 2004 unsigned char *sourceRow = sourceSlice; 2005 unsigned char *destinationRow = destinationSlice; 2006 2007 for(int y = 0; y < height; y++) 2008 { 2009 unsigned char *sourceElement = sourceRow; 2010 unsigned char *destinationElement = destinationRow; 2011 2012 for(int x = 0; x < width; x++) 2013 { 2014 unsigned int b = sourceElement[0]; 2015 unsigned int g = sourceElement[1]; 2016 unsigned int r = sourceElement[2]; 2017 2018 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0); 2019 2020 sourceElement += source.bytes; 2021 destinationElement += destination.bytes; 2022 } 2023 2024 sourceRow += source.pitchB; 2025 destinationRow += destination.pitchB; 2026 } 2027 2028 sourceSlice += source.sliceB; 2029 destinationSlice += destination.sliceB; 2030 } 2031 2032 source.unlockRect(); 2033 destination.unlockRect(); 2034 } 2035 decodeX1R5G5B5(Buffer & destination,Buffer & source)2036 void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source) 2037 { 2038 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2039 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2040 2041 int depth = min(destination.depth, source.depth); 2042 int height = min(destination.height, source.height); 2043 int width = min(destination.width, source.width); 2044 2045 for(int z = 0; z < depth; z++) 2046 { 2047 unsigned char *sourceRow = sourceSlice; 2048 unsigned char *destinationRow = destinationSlice; 2049 2050 for(int y = 0; y < height; y++) 2051 { 2052 unsigned char *sourceElement = sourceRow; 2053 unsigned char *destinationElement = destinationRow; 2054 2055 for(int x = 0; x < width; x++) 2056 { 2057 unsigned int xrgb = *(unsigned short*)sourceElement; 2058 2059 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 2060 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 2061 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8); 2062 2063 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2064 2065 sourceElement += source.bytes; 2066 destinationElement += destination.bytes; 2067 } 2068 2069 sourceRow += source.pitchB; 2070 destinationRow += destination.pitchB; 2071 } 2072 2073 sourceSlice += source.sliceB; 2074 destinationSlice += destination.sliceB; 2075 } 2076 2077 source.unlockRect(); 2078 destination.unlockRect(); 2079 } 2080 decodeA1R5G5B5(Buffer & destination,Buffer & source)2081 void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source) 2082 { 2083 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2084 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2085 2086 int depth = min(destination.depth, source.depth); 2087 int height = min(destination.height, source.height); 2088 int width = min(destination.width, source.width); 2089 2090 for(int z = 0; z < depth; z++) 2091 { 2092 unsigned char *sourceRow = sourceSlice; 2093 unsigned char *destinationRow = destinationSlice; 2094 2095 for(int y = 0; y < height; y++) 2096 { 2097 unsigned char *sourceElement = sourceRow; 2098 unsigned char *destinationElement = destinationRow; 2099 2100 for(int x = 0; x < width; x++) 2101 { 2102 unsigned int argb = *(unsigned short*)sourceElement; 2103 2104 unsigned int a = (argb & 0x8000) * 130560; 2105 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 2106 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 2107 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8); 2108 2109 *(unsigned int*)destinationElement = a | r | g | b; 2110 2111 sourceElement += source.bytes; 2112 destinationElement += destination.bytes; 2113 } 2114 2115 sourceRow += source.pitchB; 2116 destinationRow += destination.pitchB; 2117 } 2118 2119 sourceSlice += source.sliceB; 2120 destinationSlice += destination.sliceB; 2121 } 2122 2123 source.unlockRect(); 2124 destination.unlockRect(); 2125 } 2126 decodeX4R4G4B4(Buffer & destination,Buffer & source)2127 void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source) 2128 { 2129 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2130 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2131 2132 int depth = min(destination.depth, source.depth); 2133 int height = min(destination.height, source.height); 2134 int width = min(destination.width, source.width); 2135 2136 for(int z = 0; z < depth; z++) 2137 { 2138 unsigned char *sourceRow = sourceSlice; 2139 unsigned char *destinationRow = destinationSlice; 2140 2141 for(int y = 0; y < height; y++) 2142 { 2143 unsigned char *sourceElement = sourceRow; 2144 unsigned char *destinationElement = destinationRow; 2145 2146 for(int x = 0; x < width; x++) 2147 { 2148 unsigned int xrgb = *(unsigned short*)sourceElement; 2149 2150 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000; 2151 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00; 2152 unsigned int b = (xrgb & 0x000F) * 0x00000011; 2153 2154 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2155 2156 sourceElement += source.bytes; 2157 destinationElement += destination.bytes; 2158 } 2159 2160 sourceRow += source.pitchB; 2161 destinationRow += destination.pitchB; 2162 } 2163 2164 sourceSlice += source.sliceB; 2165 destinationSlice += destination.sliceB; 2166 } 2167 2168 source.unlockRect(); 2169 destination.unlockRect(); 2170 } 2171 decodeA4R4G4B4(Buffer & destination,Buffer & source)2172 void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source) 2173 { 2174 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2175 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2176 2177 int depth = min(destination.depth, source.depth); 2178 int height = min(destination.height, source.height); 2179 int width = min(destination.width, source.width); 2180 2181 for(int z = 0; z < depth; z++) 2182 { 2183 unsigned char *sourceRow = sourceSlice; 2184 unsigned char *destinationRow = destinationSlice; 2185 2186 for(int y = 0; y < height; y++) 2187 { 2188 unsigned char *sourceElement = sourceRow; 2189 unsigned char *destinationElement = destinationRow; 2190 2191 for(int x = 0; x < width; x++) 2192 { 2193 unsigned int argb = *(unsigned short*)sourceElement; 2194 2195 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000; 2196 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000; 2197 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00; 2198 unsigned int b = (argb & 0x000F) * 0x00000011; 2199 2200 *(unsigned int*)destinationElement = a | r | g | b; 2201 2202 sourceElement += source.bytes; 2203 destinationElement += destination.bytes; 2204 } 2205 2206 sourceRow += source.pitchB; 2207 destinationRow += destination.pitchB; 2208 } 2209 2210 sourceSlice += source.sliceB; 2211 destinationSlice += destination.sliceB; 2212 } 2213 2214 source.unlockRect(); 2215 destination.unlockRect(); 2216 } 2217 decodeP8(Buffer & destination,Buffer & source)2218 void Surface::decodeP8(Buffer &destination, Buffer &source) 2219 { 2220 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2221 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2222 2223 int depth = min(destination.depth, source.depth); 2224 int height = min(destination.height, source.height); 2225 int width = min(destination.width, source.width); 2226 2227 for(int z = 0; z < depth; z++) 2228 { 2229 unsigned char *sourceRow = sourceSlice; 2230 unsigned char *destinationRow = destinationSlice; 2231 2232 for(int y = 0; y < height; y++) 2233 { 2234 unsigned char *sourceElement = sourceRow; 2235 unsigned char *destinationElement = destinationRow; 2236 2237 for(int x = 0; x < width; x++) 2238 { 2239 unsigned int abgr = palette[*(unsigned char*)sourceElement]; 2240 2241 unsigned int r = (abgr & 0x000000FF) << 16; 2242 unsigned int g = (abgr & 0x0000FF00) << 0; 2243 unsigned int b = (abgr & 0x00FF0000) >> 16; 2244 unsigned int a = (abgr & 0xFF000000) >> 0; 2245 2246 *(unsigned int*)destinationElement = a | r | g | b; 2247 2248 sourceElement += source.bytes; 2249 destinationElement += destination.bytes; 2250 } 2251 2252 sourceRow += source.pitchB; 2253 destinationRow += destination.pitchB; 2254 } 2255 2256 sourceSlice += source.sliceB; 2257 destinationSlice += destination.sliceB; 2258 } 2259 2260 source.unlockRect(); 2261 destination.unlockRect(); 2262 } 2263 decodeDXT1(Buffer & internal,Buffer & external)2264 void Surface::decodeDXT1(Buffer &internal, Buffer &external) 2265 { 2266 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2267 const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY); 2268 2269 for(int z = 0; z < external.depth; z++) 2270 { 2271 unsigned int *dest = destSlice; 2272 2273 for(int y = 0; y < external.height; y += 4) 2274 { 2275 for(int x = 0; x < external.width; x += 4) 2276 { 2277 Color<byte> c[4]; 2278 2279 c[0] = source->c0; 2280 c[1] = source->c1; 2281 2282 if(source->c0 > source->c1) // No transparency 2283 { 2284 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2285 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2286 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2287 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2288 c[2].a = 0xFF; 2289 2290 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2291 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2292 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2293 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2294 c[3].a = 0xFF; 2295 } 2296 else // c3 transparent 2297 { 2298 // c2 = 1 / 2 * c0 + 1 / 2 * c1 2299 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2); 2300 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2); 2301 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2); 2302 c[2].a = 0xFF; 2303 2304 c[3].r = 0; 2305 c[3].g = 0; 2306 c[3].b = 0; 2307 c[3].a = 0; 2308 } 2309 2310 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2311 { 2312 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2313 { 2314 dest[(x + i) + (y + j) * internal.pitchP] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4]; 2315 } 2316 } 2317 2318 source++; 2319 } 2320 } 2321 2322 (byte*&)destSlice += internal.sliceB; 2323 } 2324 2325 external.unlockRect(); 2326 internal.unlockRect(); 2327 } 2328 decodeDXT3(Buffer & internal,Buffer & external)2329 void Surface::decodeDXT3(Buffer &internal, Buffer &external) 2330 { 2331 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2332 const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY); 2333 2334 for(int z = 0; z < external.depth; z++) 2335 { 2336 unsigned int *dest = destSlice; 2337 2338 for(int y = 0; y < external.height; y += 4) 2339 { 2340 for(int x = 0; x < external.width; x += 4) 2341 { 2342 Color<byte> c[4]; 2343 2344 c[0] = source->c0; 2345 c[1] = source->c1; 2346 2347 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2348 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2349 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2350 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2351 2352 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2353 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2354 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2355 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2356 2357 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2358 { 2359 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2360 { 2361 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F; 2362 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24)); 2363 2364 dest[(x + i) + (y + j) * internal.pitchP] = color; 2365 } 2366 } 2367 2368 source++; 2369 } 2370 } 2371 2372 (byte*&)destSlice += internal.sliceB; 2373 } 2374 2375 external.unlockRect(); 2376 internal.unlockRect(); 2377 } 2378 decodeDXT5(Buffer & internal,Buffer & external)2379 void Surface::decodeDXT5(Buffer &internal, Buffer &external) 2380 { 2381 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2382 const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY); 2383 2384 for(int z = 0; z < external.depth; z++) 2385 { 2386 unsigned int *dest = destSlice; 2387 2388 for(int y = 0; y < external.height; y += 4) 2389 { 2390 for(int x = 0; x < external.width; x += 4) 2391 { 2392 Color<byte> c[4]; 2393 2394 c[0] = source->c0; 2395 c[1] = source->c1; 2396 2397 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2398 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2399 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2400 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2401 2402 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2403 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2404 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2405 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2406 2407 byte a[8]; 2408 2409 a[0] = source->a0; 2410 a[1] = source->a1; 2411 2412 if(a[0] > a[1]) 2413 { 2414 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7); 2415 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7); 2416 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7); 2417 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7); 2418 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7); 2419 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7); 2420 } 2421 else 2422 { 2423 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5); 2424 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5); 2425 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5); 2426 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5); 2427 a[6] = 0; 2428 a[7] = 0xFF; 2429 } 2430 2431 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2432 { 2433 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2434 { 2435 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24; 2436 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha; 2437 2438 dest[(x + i) + (y + j) * internal.pitchP] = color; 2439 } 2440 } 2441 2442 source++; 2443 } 2444 } 2445 2446 (byte*&)destSlice += internal.sliceB; 2447 } 2448 2449 external.unlockRect(); 2450 internal.unlockRect(); 2451 } 2452 decodeATI1(Buffer & internal,Buffer & external)2453 void Surface::decodeATI1(Buffer &internal, Buffer &external) 2454 { 2455 byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2456 const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY); 2457 2458 for(int z = 0; z < external.depth; z++) 2459 { 2460 byte *dest = destSlice; 2461 2462 for(int y = 0; y < external.height; y += 4) 2463 { 2464 for(int x = 0; x < external.width; x += 4) 2465 { 2466 byte r[8]; 2467 2468 r[0] = source->r0; 2469 r[1] = source->r1; 2470 2471 if(r[0] > r[1]) 2472 { 2473 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7); 2474 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7); 2475 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7); 2476 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7); 2477 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7); 2478 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7); 2479 } 2480 else 2481 { 2482 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5); 2483 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5); 2484 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5); 2485 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5); 2486 r[6] = 0; 2487 r[7] = 0xFF; 2488 } 2489 2490 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2491 { 2492 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2493 { 2494 dest[(x + i) + (y + j) * internal.pitchP] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8]; 2495 } 2496 } 2497 2498 source++; 2499 } 2500 } 2501 2502 destSlice += internal.sliceB; 2503 } 2504 2505 external.unlockRect(); 2506 internal.unlockRect(); 2507 } 2508 decodeATI2(Buffer & internal,Buffer & external)2509 void Surface::decodeATI2(Buffer &internal, Buffer &external) 2510 { 2511 word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2512 const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY); 2513 2514 for(int z = 0; z < external.depth; z++) 2515 { 2516 word *dest = destSlice; 2517 2518 for(int y = 0; y < external.height; y += 4) 2519 { 2520 for(int x = 0; x < external.width; x += 4) 2521 { 2522 byte X[8]; 2523 2524 X[0] = source->x0; 2525 X[1] = source->x1; 2526 2527 if(X[0] > X[1]) 2528 { 2529 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7); 2530 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7); 2531 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7); 2532 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7); 2533 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7); 2534 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7); 2535 } 2536 else 2537 { 2538 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5); 2539 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5); 2540 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5); 2541 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5); 2542 X[6] = 0; 2543 X[7] = 0xFF; 2544 } 2545 2546 byte Y[8]; 2547 2548 Y[0] = source->y0; 2549 Y[1] = source->y1; 2550 2551 if(Y[0] > Y[1]) 2552 { 2553 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7); 2554 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7); 2555 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7); 2556 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7); 2557 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7); 2558 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7); 2559 } 2560 else 2561 { 2562 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5); 2563 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5); 2564 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5); 2565 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5); 2566 Y[6] = 0; 2567 Y[7] = 0xFF; 2568 } 2569 2570 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2571 { 2572 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2573 { 2574 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8]; 2575 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8]; 2576 2577 dest[(x + i) + (y + j) * internal.pitchP] = (g << 8) + r; 2578 } 2579 } 2580 2581 source++; 2582 } 2583 } 2584 2585 (byte*&)destSlice += internal.sliceB; 2586 } 2587 2588 external.unlockRect(); 2589 internal.unlockRect(); 2590 } 2591 decodeETC2(Buffer & internal,Buffer & external,int nbAlphaBits,bool isSRGB)2592 void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB) 2593 { 2594 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2595 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB)); 2596 external.unlockRect(); 2597 internal.unlockRect(); 2598 2599 if(isSRGB) 2600 { 2601 static byte sRGBtoLinearTable[256]; 2602 static bool sRGBtoLinearTableDirty = true; 2603 if(sRGBtoLinearTableDirty) 2604 { 2605 for(int i = 0; i < 256; i++) 2606 { 2607 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f); 2608 } 2609 sRGBtoLinearTableDirty = false; 2610 } 2611 2612 // Perform sRGB conversion in place after decoding 2613 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE); 2614 for(int y = 0; y < internal.height; y++) 2615 { 2616 byte *srcRow = src + y * internal.pitchB; 2617 for(int x = 0; x < internal.width; x++) 2618 { 2619 byte *srcPix = srcRow + x * internal.bytes; 2620 for(int i = 0; i < 3; i++) 2621 { 2622 srcPix[i] = sRGBtoLinearTable[srcPix[i]]; 2623 } 2624 } 2625 } 2626 internal.unlockRect(); 2627 } 2628 } 2629 decodeEAC(Buffer & internal,Buffer & external,int nbChannels,bool isSigned)2630 void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned) 2631 { 2632 ASSERT(nbChannels == 1 || nbChannels == 2); 2633 2634 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE); 2635 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), src, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2636 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED)); 2637 external.unlockRect(); 2638 2639 // FIXME: We convert EAC data to float, until signed short internal formats are supported 2640 // This code can be removed if ETC2 images are decoded to internal 16 bit signed R/RG formats 2641 const float normalization = isSigned ? (1.0f / (8.0f * 127.875f)) : (1.0f / (8.0f * 255.875f)); 2642 for(int y = 0; y < internal.height; y++) 2643 { 2644 byte* srcRow = src + y * internal.pitchB; 2645 for(int x = internal.width - 1; x >= 0; x--) 2646 { 2647 int* srcPix = reinterpret_cast<int*>(srcRow + x * internal.bytes); 2648 float* dstPix = reinterpret_cast<float*>(srcPix); 2649 for(int c = nbChannels - 1; c >= 0; c--) 2650 { 2651 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f); 2652 } 2653 } 2654 } 2655 2656 internal.unlockRect(); 2657 } 2658 decodeASTC(Buffer & internal,Buffer & external,int xBlockSize,int yBlockSize,int zBlockSize,bool isSRGB)2659 void Surface::decodeASTC(Buffer &internal, Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB) 2660 { 2661 } 2662 size(int width,int height,int depth,int border,int samples,Format format)2663 size_t Surface::size(int width, int height, int depth, int border, int samples, Format format) 2664 { 2665 samples = max(1, samples); 2666 2667 switch(format) 2668 { 2669 default: 2670 { 2671 uint64_t size = (uint64_t)sliceB(width, height, border, format, true) * depth * samples; 2672 2673 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes, 2674 // and stencil operations also read 8 bytes per four 8-bit stencil values, 2675 // so we have to allocate 4 extra bytes to avoid buffer overruns. 2676 size += 4; 2677 2678 // We can only sample buffers smaller than 2 GiB. 2679 // Force an out-of-memory if larger, or let the caller report an error. 2680 return size < 0x80000000u ? (size_t)size : std::numeric_limits<size_t>::max(); 2681 } 2682 case FORMAT_YV12_BT601: 2683 case FORMAT_YV12_BT709: 2684 case FORMAT_YV12_JFIF: 2685 { 2686 width += 2 * border; 2687 height += 2 * border; 2688 2689 size_t YStride = align<16>(width); 2690 size_t YSize = YStride * height; 2691 size_t CStride = align<16>(YStride / 2); 2692 size_t CSize = CStride * height / 2; 2693 2694 return YSize + 2 * CSize; 2695 } 2696 } 2697 } 2698 isStencil(Format format)2699 bool Surface::isStencil(Format format) 2700 { 2701 switch(format) 2702 { 2703 case FORMAT_D32: 2704 case FORMAT_D16: 2705 case FORMAT_D24X8: 2706 case FORMAT_D32F: 2707 case FORMAT_D32F_COMPLEMENTARY: 2708 case FORMAT_D32F_LOCKABLE: 2709 case FORMAT_D32F_SHADOW: 2710 return false; 2711 case FORMAT_D24S8: 2712 case FORMAT_D24FS8: 2713 case FORMAT_S8: 2714 case FORMAT_DF24S8: 2715 case FORMAT_DF16S8: 2716 case FORMAT_D32FS8_TEXTURE: 2717 case FORMAT_D32FS8_SHADOW: 2718 case FORMAT_D32FS8: 2719 case FORMAT_D32FS8_COMPLEMENTARY: 2720 case FORMAT_INTZ: 2721 return true; 2722 default: 2723 return false; 2724 } 2725 } 2726 isDepth(Format format)2727 bool Surface::isDepth(Format format) 2728 { 2729 switch(format) 2730 { 2731 case FORMAT_D32: 2732 case FORMAT_D16: 2733 case FORMAT_D24X8: 2734 case FORMAT_D24S8: 2735 case FORMAT_D24FS8: 2736 case FORMAT_D32F: 2737 case FORMAT_D32FS8: 2738 case FORMAT_D32F_COMPLEMENTARY: 2739 case FORMAT_D32FS8_COMPLEMENTARY: 2740 case FORMAT_D32F_LOCKABLE: 2741 case FORMAT_DF24S8: 2742 case FORMAT_DF16S8: 2743 case FORMAT_D32FS8_TEXTURE: 2744 case FORMAT_D32F_SHADOW: 2745 case FORMAT_D32FS8_SHADOW: 2746 case FORMAT_INTZ: 2747 return true; 2748 case FORMAT_S8: 2749 return false; 2750 default: 2751 return false; 2752 } 2753 } 2754 hasQuadLayout(Format format)2755 bool Surface::hasQuadLayout(Format format) 2756 { 2757 switch(format) 2758 { 2759 case FORMAT_D32: 2760 case FORMAT_D16: 2761 case FORMAT_D24X8: 2762 case FORMAT_D24S8: 2763 case FORMAT_D24FS8: 2764 case FORMAT_D32F: 2765 case FORMAT_D32FS8: 2766 case FORMAT_D32F_COMPLEMENTARY: 2767 case FORMAT_D32FS8_COMPLEMENTARY: 2768 case FORMAT_DF24S8: 2769 case FORMAT_DF16S8: 2770 case FORMAT_INTZ: 2771 case FORMAT_S8: 2772 case FORMAT_A8G8R8B8Q: 2773 case FORMAT_X8G8R8B8Q: 2774 return true; 2775 case FORMAT_D32F_LOCKABLE: 2776 case FORMAT_D32FS8_TEXTURE: 2777 case FORMAT_D32F_SHADOW: 2778 case FORMAT_D32FS8_SHADOW: 2779 default: 2780 break; 2781 } 2782 2783 return false; 2784 } 2785 isPalette(Format format)2786 bool Surface::isPalette(Format format) 2787 { 2788 switch(format) 2789 { 2790 case FORMAT_P8: 2791 case FORMAT_A8P8: 2792 return true; 2793 default: 2794 return false; 2795 } 2796 } 2797 isFloatFormat(Format format)2798 bool Surface::isFloatFormat(Format format) 2799 { 2800 switch(format) 2801 { 2802 case FORMAT_R5G6B5: 2803 case FORMAT_R8G8B8: 2804 case FORMAT_B8G8R8: 2805 case FORMAT_X8R8G8B8: 2806 case FORMAT_X8B8G8R8I: 2807 case FORMAT_X8B8G8R8: 2808 case FORMAT_A8R8G8B8: 2809 case FORMAT_SRGB8_X8: 2810 case FORMAT_SRGB8_A8: 2811 case FORMAT_A8B8G8R8I: 2812 case FORMAT_R8UI: 2813 case FORMAT_G8R8UI: 2814 case FORMAT_X8B8G8R8UI: 2815 case FORMAT_A8B8G8R8UI: 2816 case FORMAT_A8B8G8R8: 2817 case FORMAT_G8R8I: 2818 case FORMAT_G8R8: 2819 case FORMAT_A2B10G10R10: 2820 case FORMAT_A2B10G10R10UI: 2821 case FORMAT_R8_SNORM: 2822 case FORMAT_G8R8_SNORM: 2823 case FORMAT_X8B8G8R8_SNORM: 2824 case FORMAT_A8B8G8R8_SNORM: 2825 case FORMAT_R16I: 2826 case FORMAT_R16UI: 2827 case FORMAT_G16R16I: 2828 case FORMAT_G16R16UI: 2829 case FORMAT_G16R16: 2830 case FORMAT_X16B16G16R16I: 2831 case FORMAT_X16B16G16R16UI: 2832 case FORMAT_A16B16G16R16I: 2833 case FORMAT_A16B16G16R16UI: 2834 case FORMAT_A16B16G16R16: 2835 case FORMAT_V8U8: 2836 case FORMAT_Q8W8V8U8: 2837 case FORMAT_X8L8V8U8: 2838 case FORMAT_V16U16: 2839 case FORMAT_A16W16V16U16: 2840 case FORMAT_Q16W16V16U16: 2841 case FORMAT_A8: 2842 case FORMAT_R8I: 2843 case FORMAT_R8: 2844 case FORMAT_S8: 2845 case FORMAT_L8: 2846 case FORMAT_L16: 2847 case FORMAT_A8L8: 2848 case FORMAT_YV12_BT601: 2849 case FORMAT_YV12_BT709: 2850 case FORMAT_YV12_JFIF: 2851 case FORMAT_R32I: 2852 case FORMAT_R32UI: 2853 case FORMAT_G32R32I: 2854 case FORMAT_G32R32UI: 2855 case FORMAT_X32B32G32R32I: 2856 case FORMAT_X32B32G32R32UI: 2857 case FORMAT_A32B32G32R32I: 2858 case FORMAT_A32B32G32R32UI: 2859 return false; 2860 case FORMAT_R16F: 2861 case FORMAT_G16R16F: 2862 case FORMAT_B16G16R16F: 2863 case FORMAT_X16B16G16R16F: 2864 case FORMAT_A16B16G16R16F: 2865 case FORMAT_X16B16G16R16F_UNSIGNED: 2866 case FORMAT_R32F: 2867 case FORMAT_G32R32F: 2868 case FORMAT_B32G32R32F: 2869 case FORMAT_X32B32G32R32F: 2870 case FORMAT_A32B32G32R32F: 2871 case FORMAT_X32B32G32R32F_UNSIGNED: 2872 case FORMAT_D32F: 2873 case FORMAT_D32FS8: 2874 case FORMAT_D32F_COMPLEMENTARY: 2875 case FORMAT_D32FS8_COMPLEMENTARY: 2876 case FORMAT_D32F_LOCKABLE: 2877 case FORMAT_D32FS8_TEXTURE: 2878 case FORMAT_D32F_SHADOW: 2879 case FORMAT_D32FS8_SHADOW: 2880 case FORMAT_L16F: 2881 case FORMAT_A16L16F: 2882 case FORMAT_L32F: 2883 case FORMAT_A32L32F: 2884 return true; 2885 default: 2886 ASSERT(false); 2887 } 2888 2889 return false; 2890 } 2891 isUnsignedComponent(Format format,int component)2892 bool Surface::isUnsignedComponent(Format format, int component) 2893 { 2894 switch(format) 2895 { 2896 case FORMAT_NULL: 2897 case FORMAT_R5G6B5: 2898 case FORMAT_R8G8B8: 2899 case FORMAT_B8G8R8: 2900 case FORMAT_X8R8G8B8: 2901 case FORMAT_X8B8G8R8: 2902 case FORMAT_A8R8G8B8: 2903 case FORMAT_A8B8G8R8: 2904 case FORMAT_SRGB8_X8: 2905 case FORMAT_SRGB8_A8: 2906 case FORMAT_G8R8: 2907 case FORMAT_A2B10G10R10: 2908 case FORMAT_A2B10G10R10UI: 2909 case FORMAT_R16UI: 2910 case FORMAT_G16R16: 2911 case FORMAT_G16R16UI: 2912 case FORMAT_X16B16G16R16UI: 2913 case FORMAT_A16B16G16R16: 2914 case FORMAT_A16B16G16R16UI: 2915 case FORMAT_R32UI: 2916 case FORMAT_G32R32UI: 2917 case FORMAT_X32B32G32R32UI: 2918 case FORMAT_A32B32G32R32UI: 2919 case FORMAT_X32B32G32R32F_UNSIGNED: 2920 case FORMAT_R8UI: 2921 case FORMAT_G8R8UI: 2922 case FORMAT_X8B8G8R8UI: 2923 case FORMAT_A8B8G8R8UI: 2924 case FORMAT_D32F: 2925 case FORMAT_D32FS8: 2926 case FORMAT_D32F_COMPLEMENTARY: 2927 case FORMAT_D32FS8_COMPLEMENTARY: 2928 case FORMAT_D32F_LOCKABLE: 2929 case FORMAT_D32FS8_TEXTURE: 2930 case FORMAT_D32F_SHADOW: 2931 case FORMAT_D32FS8_SHADOW: 2932 case FORMAT_A8: 2933 case FORMAT_R8: 2934 case FORMAT_L8: 2935 case FORMAT_L16: 2936 case FORMAT_A8L8: 2937 case FORMAT_YV12_BT601: 2938 case FORMAT_YV12_BT709: 2939 case FORMAT_YV12_JFIF: 2940 return true; 2941 case FORMAT_A8B8G8R8I: 2942 case FORMAT_A16B16G16R16I: 2943 case FORMAT_A32B32G32R32I: 2944 case FORMAT_A8B8G8R8_SNORM: 2945 case FORMAT_Q8W8V8U8: 2946 case FORMAT_Q16W16V16U16: 2947 case FORMAT_A32B32G32R32F: 2948 return false; 2949 case FORMAT_R32F: 2950 case FORMAT_R8I: 2951 case FORMAT_R16I: 2952 case FORMAT_R32I: 2953 case FORMAT_R8_SNORM: 2954 return component >= 1; 2955 case FORMAT_V8U8: 2956 case FORMAT_X8L8V8U8: 2957 case FORMAT_V16U16: 2958 case FORMAT_G32R32F: 2959 case FORMAT_G8R8I: 2960 case FORMAT_G16R16I: 2961 case FORMAT_G32R32I: 2962 case FORMAT_G8R8_SNORM: 2963 return component >= 2; 2964 case FORMAT_A16W16V16U16: 2965 case FORMAT_B32G32R32F: 2966 case FORMAT_X32B32G32R32F: 2967 case FORMAT_X8B8G8R8I: 2968 case FORMAT_X16B16G16R16I: 2969 case FORMAT_X32B32G32R32I: 2970 case FORMAT_X8B8G8R8_SNORM: 2971 return component >= 3; 2972 default: 2973 ASSERT(false); 2974 } 2975 2976 return false; 2977 } 2978 isSRGBreadable(Format format)2979 bool Surface::isSRGBreadable(Format format) 2980 { 2981 // Keep in sync with Capabilities::isSRGBreadable 2982 switch(format) 2983 { 2984 case FORMAT_L8: 2985 case FORMAT_A8L8: 2986 case FORMAT_R8G8B8: 2987 case FORMAT_A8R8G8B8: 2988 case FORMAT_X8R8G8B8: 2989 case FORMAT_A8B8G8R8: 2990 case FORMAT_X8B8G8R8: 2991 case FORMAT_SRGB8_X8: 2992 case FORMAT_SRGB8_A8: 2993 case FORMAT_R5G6B5: 2994 case FORMAT_X1R5G5B5: 2995 case FORMAT_A1R5G5B5: 2996 case FORMAT_A4R4G4B4: 2997 case FORMAT_DXT1: 2998 case FORMAT_DXT3: 2999 case FORMAT_DXT5: 3000 case FORMAT_ATI1: 3001 case FORMAT_ATI2: 3002 return true; 3003 default: 3004 return false; 3005 } 3006 } 3007 isSRGBwritable(Format format)3008 bool Surface::isSRGBwritable(Format format) 3009 { 3010 // Keep in sync with Capabilities::isSRGBwritable 3011 switch(format) 3012 { 3013 case FORMAT_NULL: 3014 case FORMAT_A8R8G8B8: 3015 case FORMAT_X8R8G8B8: 3016 case FORMAT_A8B8G8R8: 3017 case FORMAT_X8B8G8R8: 3018 case FORMAT_SRGB8_X8: 3019 case FORMAT_SRGB8_A8: 3020 case FORMAT_R5G6B5: 3021 return true; 3022 default: 3023 return false; 3024 } 3025 } 3026 isSRGBformat(Format format)3027 bool Surface::isSRGBformat(Format format) 3028 { 3029 switch(format) 3030 { 3031 case FORMAT_SRGB8_X8: 3032 case FORMAT_SRGB8_A8: 3033 return true; 3034 default: 3035 return false; 3036 } 3037 } 3038 isCompressed(Format format)3039 bool Surface::isCompressed(Format format) 3040 { 3041 switch(format) 3042 { 3043 case FORMAT_DXT1: 3044 case FORMAT_DXT3: 3045 case FORMAT_DXT5: 3046 case FORMAT_ATI1: 3047 case FORMAT_ATI2: 3048 case FORMAT_ETC1: 3049 case FORMAT_R11_EAC: 3050 case FORMAT_SIGNED_R11_EAC: 3051 case FORMAT_RG11_EAC: 3052 case FORMAT_SIGNED_RG11_EAC: 3053 case FORMAT_RGB8_ETC2: 3054 case FORMAT_SRGB8_ETC2: 3055 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3056 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3057 case FORMAT_RGBA8_ETC2_EAC: 3058 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3059 case FORMAT_RGBA_ASTC_4x4_KHR: 3060 case FORMAT_RGBA_ASTC_5x4_KHR: 3061 case FORMAT_RGBA_ASTC_5x5_KHR: 3062 case FORMAT_RGBA_ASTC_6x5_KHR: 3063 case FORMAT_RGBA_ASTC_6x6_KHR: 3064 case FORMAT_RGBA_ASTC_8x5_KHR: 3065 case FORMAT_RGBA_ASTC_8x6_KHR: 3066 case FORMAT_RGBA_ASTC_8x8_KHR: 3067 case FORMAT_RGBA_ASTC_10x5_KHR: 3068 case FORMAT_RGBA_ASTC_10x6_KHR: 3069 case FORMAT_RGBA_ASTC_10x8_KHR: 3070 case FORMAT_RGBA_ASTC_10x10_KHR: 3071 case FORMAT_RGBA_ASTC_12x10_KHR: 3072 case FORMAT_RGBA_ASTC_12x12_KHR: 3073 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3074 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3075 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3076 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3077 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3078 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3079 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3080 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3081 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3082 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3083 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3084 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3085 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3086 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3087 return true; 3088 default: 3089 return false; 3090 } 3091 } 3092 isSignedNonNormalizedInteger(Format format)3093 bool Surface::isSignedNonNormalizedInteger(Format format) 3094 { 3095 switch(format) 3096 { 3097 case FORMAT_A8B8G8R8I: 3098 case FORMAT_X8B8G8R8I: 3099 case FORMAT_G8R8I: 3100 case FORMAT_R8I: 3101 case FORMAT_A16B16G16R16I: 3102 case FORMAT_X16B16G16R16I: 3103 case FORMAT_G16R16I: 3104 case FORMAT_R16I: 3105 case FORMAT_A32B32G32R32I: 3106 case FORMAT_X32B32G32R32I: 3107 case FORMAT_G32R32I: 3108 case FORMAT_R32I: 3109 return true; 3110 default: 3111 return false; 3112 } 3113 } 3114 isUnsignedNonNormalizedInteger(Format format)3115 bool Surface::isUnsignedNonNormalizedInteger(Format format) 3116 { 3117 switch(format) 3118 { 3119 case FORMAT_A8B8G8R8UI: 3120 case FORMAT_X8B8G8R8UI: 3121 case FORMAT_G8R8UI: 3122 case FORMAT_R8UI: 3123 case FORMAT_A16B16G16R16UI: 3124 case FORMAT_X16B16G16R16UI: 3125 case FORMAT_G16R16UI: 3126 case FORMAT_R16UI: 3127 case FORMAT_A32B32G32R32UI: 3128 case FORMAT_X32B32G32R32UI: 3129 case FORMAT_G32R32UI: 3130 case FORMAT_R32UI: 3131 return true; 3132 default: 3133 return false; 3134 } 3135 } 3136 isNonNormalizedInteger(Format format)3137 bool Surface::isNonNormalizedInteger(Format format) 3138 { 3139 return isSignedNonNormalizedInteger(format) || 3140 isUnsignedNonNormalizedInteger(format); 3141 } 3142 isNormalizedInteger(Format format)3143 bool Surface::isNormalizedInteger(Format format) 3144 { 3145 return !isFloatFormat(format) && 3146 !isNonNormalizedInteger(format) && 3147 !isCompressed(format) && 3148 !isDepth(format) && 3149 !isStencil(format); 3150 } 3151 componentCount(Format format)3152 int Surface::componentCount(Format format) 3153 { 3154 switch(format) 3155 { 3156 case FORMAT_R5G6B5: return 3; 3157 case FORMAT_X8R8G8B8: return 3; 3158 case FORMAT_X8B8G8R8I: return 3; 3159 case FORMAT_X8B8G8R8: return 3; 3160 case FORMAT_A8R8G8B8: return 4; 3161 case FORMAT_SRGB8_X8: return 3; 3162 case FORMAT_SRGB8_A8: return 4; 3163 case FORMAT_A8B8G8R8I: return 4; 3164 case FORMAT_A8B8G8R8: return 4; 3165 case FORMAT_G8R8I: return 2; 3166 case FORMAT_G8R8: return 2; 3167 case FORMAT_R8_SNORM: return 1; 3168 case FORMAT_G8R8_SNORM: return 2; 3169 case FORMAT_X8B8G8R8_SNORM:return 3; 3170 case FORMAT_A8B8G8R8_SNORM:return 4; 3171 case FORMAT_R8UI: return 1; 3172 case FORMAT_G8R8UI: return 2; 3173 case FORMAT_X8B8G8R8UI: return 3; 3174 case FORMAT_A8B8G8R8UI: return 4; 3175 case FORMAT_A2B10G10R10: return 4; 3176 case FORMAT_A2B10G10R10UI: return 4; 3177 case FORMAT_G16R16I: return 2; 3178 case FORMAT_G16R16UI: return 2; 3179 case FORMAT_G16R16: return 2; 3180 case FORMAT_G32R32I: return 2; 3181 case FORMAT_G32R32UI: return 2; 3182 case FORMAT_X16B16G16R16I: return 3; 3183 case FORMAT_X16B16G16R16UI: return 3; 3184 case FORMAT_A16B16G16R16I: return 4; 3185 case FORMAT_A16B16G16R16UI: return 4; 3186 case FORMAT_A16B16G16R16: return 4; 3187 case FORMAT_X32B32G32R32I: return 3; 3188 case FORMAT_X32B32G32R32UI: return 3; 3189 case FORMAT_A32B32G32R32I: return 4; 3190 case FORMAT_A32B32G32R32UI: return 4; 3191 case FORMAT_V8U8: return 2; 3192 case FORMAT_Q8W8V8U8: return 4; 3193 case FORMAT_X8L8V8U8: return 3; 3194 case FORMAT_V16U16: return 2; 3195 case FORMAT_A16W16V16U16: return 4; 3196 case FORMAT_Q16W16V16U16: return 4; 3197 case FORMAT_R32F: return 1; 3198 case FORMAT_G32R32F: return 2; 3199 case FORMAT_X32B32G32R32F: return 3; 3200 case FORMAT_A32B32G32R32F: return 4; 3201 case FORMAT_X32B32G32R32F_UNSIGNED: return 3; 3202 case FORMAT_D32F: return 1; 3203 case FORMAT_D32FS8: return 1; 3204 case FORMAT_D32F_LOCKABLE: return 1; 3205 case FORMAT_D32FS8_TEXTURE: return 1; 3206 case FORMAT_D32F_SHADOW: return 1; 3207 case FORMAT_D32FS8_SHADOW: return 1; 3208 case FORMAT_A8: return 1; 3209 case FORMAT_R8I: return 1; 3210 case FORMAT_R8: return 1; 3211 case FORMAT_R16I: return 1; 3212 case FORMAT_R16UI: return 1; 3213 case FORMAT_R32I: return 1; 3214 case FORMAT_R32UI: return 1; 3215 case FORMAT_L8: return 1; 3216 case FORMAT_L16: return 1; 3217 case FORMAT_A8L8: return 2; 3218 case FORMAT_YV12_BT601: return 3; 3219 case FORMAT_YV12_BT709: return 3; 3220 case FORMAT_YV12_JFIF: return 3; 3221 default: 3222 ASSERT(false); 3223 } 3224 3225 return 1; 3226 } 3227 allocateBuffer(int width,int height,int depth,int border,int samples,Format format)3228 void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format) 3229 { 3230 return allocate(size(width, height, depth, border, samples, format)); 3231 } 3232 memfill4(void * buffer,int pattern,int bytes)3233 void Surface::memfill4(void *buffer, int pattern, int bytes) 3234 { 3235 while((size_t)buffer & 0x1 && bytes >= 1) 3236 { 3237 *(char*)buffer = (char)pattern; 3238 (char*&)buffer += 1; 3239 bytes -= 1; 3240 } 3241 3242 while((size_t)buffer & 0x3 && bytes >= 2) 3243 { 3244 *(short*)buffer = (short)pattern; 3245 (short*&)buffer += 1; 3246 bytes -= 2; 3247 } 3248 3249 #if defined(__i386__) || defined(__x86_64__) 3250 if(CPUID::supportsSSE()) 3251 { 3252 while((size_t)buffer & 0xF && bytes >= 4) 3253 { 3254 *(int*)buffer = pattern; 3255 (int*&)buffer += 1; 3256 bytes -= 4; 3257 } 3258 3259 __m128 quad = _mm_set_ps1((float&)pattern); 3260 3261 float *pointer = (float*)buffer; 3262 int qxwords = bytes / 64; 3263 bytes -= qxwords * 64; 3264 3265 while(qxwords--) 3266 { 3267 _mm_stream_ps(pointer + 0, quad); 3268 _mm_stream_ps(pointer + 4, quad); 3269 _mm_stream_ps(pointer + 8, quad); 3270 _mm_stream_ps(pointer + 12, quad); 3271 3272 pointer += 16; 3273 } 3274 3275 buffer = pointer; 3276 } 3277 #endif 3278 3279 while(bytes >= 4) 3280 { 3281 *(int*)buffer = (int)pattern; 3282 (int*&)buffer += 1; 3283 bytes -= 4; 3284 } 3285 3286 while(bytes >= 2) 3287 { 3288 *(short*)buffer = (short)pattern; 3289 (short*&)buffer += 1; 3290 bytes -= 2; 3291 } 3292 3293 while(bytes >= 1) 3294 { 3295 *(char*)buffer = (char)pattern; 3296 (char*&)buffer += 1; 3297 bytes -= 1; 3298 } 3299 } 3300 sync()3301 void Surface::sync() 3302 { 3303 resource->lock(EXCLUSIVE); 3304 resource->unlock(); 3305 } 3306 isEntire(const Rect & rect) const3307 bool Surface::isEntire(const Rect& rect) const 3308 { 3309 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1); 3310 } 3311 getRect() const3312 Rect Surface::getRect() const 3313 { 3314 return Rect(0, 0, internal.width, internal.height); 3315 } 3316 clearDepth(float depth,int x0,int y0,int width,int height)3317 void Surface::clearDepth(float depth, int x0, int y0, int width, int height) 3318 { 3319 if(width == 0 || height == 0) 3320 { 3321 return; 3322 } 3323 3324 if(internal.format == FORMAT_NULL) 3325 { 3326 return; 3327 } 3328 3329 // Not overlapping 3330 if(x0 > internal.width) return; 3331 if(y0 > internal.height) return; 3332 if(x0 + width < 0) return; 3333 if(y0 + height < 0) return; 3334 3335 // Clip against dimensions 3336 if(x0 < 0) {width += x0; x0 = 0;} 3337 if(x0 + width > internal.width) width = internal.width - x0; 3338 if(y0 < 0) {height += y0; y0 = 0;} 3339 if(y0 + height > internal.height) height = internal.height - y0; 3340 3341 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 3342 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 3343 3344 int x1 = x0 + width; 3345 int y1 = y0 + height; 3346 3347 if(!hasQuadLayout(internal.format)) 3348 { 3349 float *target = (float*)lockInternal(x0, y0, 0, lock, PUBLIC); 3350 3351 for(int z = 0; z < internal.samples; z++) 3352 { 3353 float *row = target; 3354 for(int y = y0; y < y1; y++) 3355 { 3356 memfill4(row, (int&)depth, width * sizeof(float)); 3357 row += internal.pitchP; 3358 } 3359 target += internal.sliceP; 3360 } 3361 3362 unlockInternal(); 3363 } 3364 else // Quad layout 3365 { 3366 if(complementaryDepthBuffer) 3367 { 3368 depth = 1 - depth; 3369 } 3370 3371 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC); 3372 3373 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3374 int oddX1 = (x1 & ~1) * 2; 3375 int evenX0 = ((x0 + 1) & ~1) * 2; 3376 int evenBytes = (oddX1 - evenX0) * sizeof(float); 3377 3378 for(int z = 0; z < internal.samples; z++) 3379 { 3380 for(int y = y0; y < y1; y++) 3381 { 3382 float *target = buffer + (y & ~1) * internal.pitchP + (y & 1) * 2; 3383 3384 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 3385 { 3386 if((x0 & 1) != 0) 3387 { 3388 target[oddX0 + 0] = depth; 3389 target[oddX0 + 2] = depth; 3390 } 3391 3392 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4) 3393 // { 3394 // target[x2 + 0] = depth; 3395 // target[x2 + 1] = depth; 3396 // target[x2 + 2] = depth; 3397 // target[x2 + 3] = depth; 3398 // } 3399 3400 // __asm 3401 // { 3402 // movss xmm0, depth 3403 // shufps xmm0, xmm0, 0x00 3404 // 3405 // mov eax, x0 3406 // add eax, 1 3407 // and eax, 0xFFFFFFFE 3408 // cmp eax, x1 3409 // jge qEnd 3410 // 3411 // mov edi, target 3412 // 3413 // qLoop: 3414 // movntps [edi+8*eax], xmm0 3415 // 3416 // add eax, 2 3417 // cmp eax, x1 3418 // jl qLoop 3419 // qEnd: 3420 // } 3421 3422 memfill4(&target[evenX0], (int&)depth, evenBytes); 3423 3424 if((x1 & 1) != 0) 3425 { 3426 target[oddX1 + 0] = depth; 3427 target[oddX1 + 2] = depth; 3428 } 3429 3430 y++; 3431 } 3432 else 3433 { 3434 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3435 { 3436 target[i] = depth; 3437 } 3438 } 3439 } 3440 3441 buffer += internal.sliceP; 3442 } 3443 3444 unlockInternal(); 3445 } 3446 } 3447 clearStencil(unsigned char s,unsigned char mask,int x0,int y0,int width,int height)3448 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height) 3449 { 3450 if(mask == 0 || width == 0 || height == 0) 3451 { 3452 return; 3453 } 3454 3455 if(stencil.format == FORMAT_NULL) 3456 { 3457 return; 3458 } 3459 3460 // Not overlapping 3461 if(x0 > internal.width) return; 3462 if(y0 > internal.height) return; 3463 if(x0 + width < 0) return; 3464 if(y0 + height < 0) return; 3465 3466 // Clip against dimensions 3467 if(x0 < 0) {width += x0; x0 = 0;} 3468 if(x0 + width > internal.width) width = internal.width - x0; 3469 if(y0 < 0) {height += y0; y0 = 0;} 3470 if(y0 + height > internal.height) height = internal.height - y0; 3471 3472 int x1 = x0 + width; 3473 int y1 = y0 + height; 3474 3475 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3476 int oddX1 = (x1 & ~1) * 2; 3477 int evenX0 = ((x0 + 1) & ~1) * 2; 3478 int evenBytes = oddX1 - evenX0; 3479 3480 unsigned char maskedS = s & mask; 3481 unsigned char invMask = ~mask; 3482 unsigned int fill = maskedS; 3483 fill = fill | (fill << 8) | (fill << 16) | (fill << 24); 3484 3485 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC); 3486 3487 // Stencil buffers are assumed to use quad layout 3488 for(int z = 0; z < stencil.samples; z++) 3489 { 3490 for(int y = y0; y < y1; y++) 3491 { 3492 char *target = buffer + (y & ~1) * stencil.pitchP + (y & 1) * 2; 3493 3494 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once 3495 { 3496 if((x0 & 1) != 0) 3497 { 3498 target[oddX0 + 0] = fill; 3499 target[oddX0 + 2] = fill; 3500 } 3501 3502 memfill4(&target[evenX0], fill, evenBytes); 3503 3504 if((x1 & 1) != 0) 3505 { 3506 target[oddX1 + 0] = fill; 3507 target[oddX1 + 2] = fill; 3508 } 3509 3510 y++; 3511 } 3512 else 3513 { 3514 for(int x = x0; x < x1; x++) 3515 { 3516 int i = (x & ~1) * 2 + (x & 1); 3517 target[i] = maskedS | (target[i] & invMask); 3518 } 3519 } 3520 } 3521 3522 buffer += stencil.sliceP; 3523 } 3524 3525 unlockStencil(); 3526 } 3527 fill(const Color<float> & color,int x0,int y0,int width,int height)3528 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height) 3529 { 3530 unsigned char *row; 3531 Buffer *buffer; 3532 3533 if(internal.dirty) 3534 { 3535 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3536 buffer = &internal; 3537 } 3538 else 3539 { 3540 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3541 buffer = &external; 3542 } 3543 3544 if(buffer->bytes <= 4) 3545 { 3546 int c; 3547 buffer->write(&c, color); 3548 3549 if(buffer->bytes <= 1) c = (c << 8) | c; 3550 if(buffer->bytes <= 2) c = (c << 16) | c; 3551 3552 for(int y = 0; y < height; y++) 3553 { 3554 memfill4(row, c, width * buffer->bytes); 3555 3556 row += buffer->pitchB; 3557 } 3558 } 3559 else // Generic 3560 { 3561 for(int y = 0; y < height; y++) 3562 { 3563 unsigned char *element = row; 3564 3565 for(int x = 0; x < width; x++) 3566 { 3567 buffer->write(element, color); 3568 3569 element += buffer->bytes; 3570 } 3571 3572 row += buffer->pitchB; 3573 } 3574 } 3575 3576 if(buffer == &internal) 3577 { 3578 unlockInternal(); 3579 } 3580 else 3581 { 3582 unlockExternal(); 3583 } 3584 } 3585 copyInternal(const Surface * source,int x,int y,float srcX,float srcY,bool filter)3586 void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter) 3587 { 3588 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3589 3590 sw::Color<float> color; 3591 3592 if(!filter) 3593 { 3594 color = source->internal.read((int)srcX, (int)srcY, 0); 3595 } 3596 else // Bilinear filtering 3597 { 3598 color = source->internal.sample(srcX, srcY, 0); 3599 } 3600 3601 internal.write(x, y, color); 3602 } 3603 copyInternal(const Surface * source,int x,int y,int z,float srcX,float srcY,float srcZ,bool filter)3604 void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter) 3605 { 3606 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3607 3608 sw::Color<float> color; 3609 3610 if(!filter) 3611 { 3612 color = source->internal.read((int)srcX, (int)srcY, int(srcZ)); 3613 } 3614 else // Bilinear filtering 3615 { 3616 color = source->internal.sample(srcX, srcY, srcZ); 3617 } 3618 3619 internal.write(x, y, z, color); 3620 } 3621 copyCubeEdge(Edge dstEdge,Surface * src,Edge srcEdge)3622 void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge) 3623 { 3624 Surface *dst = this; 3625 3626 // Figure out if the edges to be copied in reverse order respectively from one another 3627 // The copy should be reversed whenever the same edges are contiguous or if we're 3628 // copying top <-> right or bottom <-> left. This is explained by the layout, which is: 3629 // 3630 // | +y | 3631 // | -x | +z | +x | -z | 3632 // | -y | 3633 3634 bool reverse = (srcEdge == dstEdge) || 3635 ((srcEdge == TOP) && (dstEdge == RIGHT)) || 3636 ((srcEdge == RIGHT) && (dstEdge == TOP)) || 3637 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) || 3638 ((srcEdge == LEFT) && (dstEdge == BOTTOM)); 3639 3640 int srcBytes = src->bytes(src->Surface::getInternalFormat()); 3641 int srcPitch = src->getInternalPitchB(); 3642 int dstBytes = dst->bytes(dst->Surface::getInternalFormat()); 3643 int dstPitch = dst->getInternalPitchB(); 3644 3645 int srcW = src->getWidth(); 3646 int srcH = src->getHeight(); 3647 int dstW = dst->getWidth(); 3648 int dstH = dst->getHeight(); 3649 3650 ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes); 3651 3652 // Src is expressed in the regular [0, width-1], [0, height-1] space 3653 int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch; 3654 int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0)); 3655 3656 // Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space 3657 int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1); 3658 int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta); 3659 3660 char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart; 3661 char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart; 3662 3663 for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta) 3664 { 3665 memcpy(dstBuf, srcBuf, srcBytes); 3666 } 3667 3668 if(dstEdge == LEFT || dstEdge == RIGHT) 3669 { 3670 // TOP and BOTTOM are already set, let's average out the corners 3671 int x0 = (dstEdge == RIGHT) ? dstW : -1; 3672 int y0 = -1; 3673 int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0; 3674 int y1 = 0; 3675 dst->computeCubeCorner(x0, y0, x1, y1); 3676 y0 = dstH; 3677 y1 = dstH - 1; 3678 dst->computeCubeCorner(x0, y0, x1, y1); 3679 } 3680 3681 src->unlockInternal(); 3682 dst->unlockInternal(); 3683 } 3684 computeCubeCorner(int x0,int y0,int x1,int y1)3685 void Surface::computeCubeCorner(int x0, int y0, int x1, int y1) 3686 { 3687 ASSERT(internal.lock != LOCK_UNLOCKED); 3688 3689 sw::Color<float> color = internal.read(x0, y1); 3690 color += internal.read(x1, y0); 3691 color += internal.read(x1, y1); 3692 color *= (1.0f / 3.0f); 3693 3694 internal.write(x0, y0, color); 3695 } 3696 hasStencil() const3697 bool Surface::hasStencil() const 3698 { 3699 return isStencil(external.format); 3700 } 3701 hasDepth() const3702 bool Surface::hasDepth() const 3703 { 3704 return isDepth(external.format); 3705 } 3706 hasPalette() const3707 bool Surface::hasPalette() const 3708 { 3709 return isPalette(external.format); 3710 } 3711 isRenderTarget() const3712 bool Surface::isRenderTarget() const 3713 { 3714 return renderTarget; 3715 } 3716 hasDirtyContents() const3717 bool Surface::hasDirtyContents() const 3718 { 3719 return dirtyContents; 3720 } 3721 markContentsClean()3722 void Surface::markContentsClean() 3723 { 3724 dirtyContents = false; 3725 } 3726 getResource()3727 Resource *Surface::getResource() 3728 { 3729 return resource; 3730 } 3731 identicalBuffers() const3732 bool Surface::identicalBuffers() const 3733 { 3734 return external.format == internal.format && 3735 external.width == internal.width && 3736 external.height == internal.height && 3737 external.depth == internal.depth && 3738 external.pitchB == internal.pitchB && 3739 external.sliceB == internal.sliceB && 3740 external.border == internal.border && 3741 external.samples == internal.samples; 3742 } 3743 selectInternalFormat(Format format) const3744 Format Surface::selectInternalFormat(Format format) const 3745 { 3746 switch(format) 3747 { 3748 case FORMAT_NULL: 3749 return FORMAT_NULL; 3750 case FORMAT_P8: 3751 case FORMAT_A8P8: 3752 case FORMAT_A4R4G4B4: 3753 case FORMAT_A1R5G5B5: 3754 case FORMAT_A8R3G3B2: 3755 return FORMAT_A8R8G8B8; 3756 case FORMAT_A8: 3757 return FORMAT_A8; 3758 case FORMAT_R8I: 3759 return FORMAT_R8I; 3760 case FORMAT_R8UI: 3761 return FORMAT_R8UI; 3762 case FORMAT_R8_SNORM: 3763 return FORMAT_R8_SNORM; 3764 case FORMAT_R8: 3765 return FORMAT_R8; 3766 case FORMAT_R16I: 3767 return FORMAT_R16I; 3768 case FORMAT_R16UI: 3769 return FORMAT_R16UI; 3770 case FORMAT_R32I: 3771 return FORMAT_R32I; 3772 case FORMAT_R32UI: 3773 return FORMAT_R32UI; 3774 case FORMAT_X16B16G16R16I: 3775 return FORMAT_X16B16G16R16I; 3776 case FORMAT_A16B16G16R16I: 3777 return FORMAT_A16B16G16R16I; 3778 case FORMAT_X16B16G16R16UI: 3779 return FORMAT_X16B16G16R16UI; 3780 case FORMAT_A16B16G16R16UI: 3781 return FORMAT_A16B16G16R16UI; 3782 case FORMAT_A2R10G10B10: 3783 case FORMAT_A2B10G10R10: 3784 case FORMAT_A16B16G16R16: 3785 return FORMAT_A16B16G16R16; 3786 case FORMAT_A2B10G10R10UI: 3787 return FORMAT_A16B16G16R16UI; 3788 case FORMAT_X32B32G32R32I: 3789 return FORMAT_X32B32G32R32I; 3790 case FORMAT_A32B32G32R32I: 3791 return FORMAT_A32B32G32R32I; 3792 case FORMAT_X32B32G32R32UI: 3793 return FORMAT_X32B32G32R32UI; 3794 case FORMAT_A32B32G32R32UI: 3795 return FORMAT_A32B32G32R32UI; 3796 case FORMAT_G8R8I: 3797 return FORMAT_G8R8I; 3798 case FORMAT_G8R8UI: 3799 return FORMAT_G8R8UI; 3800 case FORMAT_G8R8_SNORM: 3801 return FORMAT_G8R8_SNORM; 3802 case FORMAT_G8R8: 3803 return FORMAT_G8R8; 3804 case FORMAT_G16R16I: 3805 return FORMAT_G16R16I; 3806 case FORMAT_G16R16UI: 3807 return FORMAT_G16R16UI; 3808 case FORMAT_G16R16: 3809 return FORMAT_G16R16; 3810 case FORMAT_G32R32I: 3811 return FORMAT_G32R32I; 3812 case FORMAT_G32R32UI: 3813 return FORMAT_G32R32UI; 3814 case FORMAT_A8R8G8B8: 3815 if(lockable || !quadLayoutEnabled) 3816 { 3817 return FORMAT_A8R8G8B8; 3818 } 3819 else 3820 { 3821 return FORMAT_A8G8R8B8Q; 3822 } 3823 case FORMAT_A8B8G8R8I: 3824 return FORMAT_A8B8G8R8I; 3825 case FORMAT_A8B8G8R8UI: 3826 return FORMAT_A8B8G8R8UI; 3827 case FORMAT_A8B8G8R8_SNORM: 3828 return FORMAT_A8B8G8R8_SNORM; 3829 case FORMAT_R5G5B5A1: 3830 case FORMAT_R4G4B4A4: 3831 case FORMAT_A8B8G8R8: 3832 return FORMAT_A8B8G8R8; 3833 case FORMAT_R5G6B5: 3834 return FORMAT_R5G6B5; 3835 case FORMAT_R3G3B2: 3836 case FORMAT_R8G8B8: 3837 case FORMAT_X4R4G4B4: 3838 case FORMAT_X1R5G5B5: 3839 case FORMAT_X8R8G8B8: 3840 if(lockable || !quadLayoutEnabled) 3841 { 3842 return FORMAT_X8R8G8B8; 3843 } 3844 else 3845 { 3846 return FORMAT_X8G8R8B8Q; 3847 } 3848 case FORMAT_X8B8G8R8I: 3849 return FORMAT_X8B8G8R8I; 3850 case FORMAT_X8B8G8R8UI: 3851 return FORMAT_X8B8G8R8UI; 3852 case FORMAT_X8B8G8R8_SNORM: 3853 return FORMAT_X8B8G8R8_SNORM; 3854 case FORMAT_B8G8R8: 3855 case FORMAT_X8B8G8R8: 3856 return FORMAT_X8B8G8R8; 3857 case FORMAT_SRGB8_X8: 3858 return FORMAT_SRGB8_X8; 3859 case FORMAT_SRGB8_A8: 3860 return FORMAT_SRGB8_A8; 3861 // Compressed formats 3862 case FORMAT_DXT1: 3863 case FORMAT_DXT3: 3864 case FORMAT_DXT5: 3865 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3866 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3867 case FORMAT_RGBA8_ETC2_EAC: 3868 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3869 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3870 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3871 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3872 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3873 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3874 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3875 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3876 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3877 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3878 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3879 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3880 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3881 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3882 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3883 return FORMAT_A8R8G8B8; 3884 case FORMAT_RGBA_ASTC_4x4_KHR: 3885 case FORMAT_RGBA_ASTC_5x4_KHR: 3886 case FORMAT_RGBA_ASTC_5x5_KHR: 3887 case FORMAT_RGBA_ASTC_6x5_KHR: 3888 case FORMAT_RGBA_ASTC_6x6_KHR: 3889 case FORMAT_RGBA_ASTC_8x5_KHR: 3890 case FORMAT_RGBA_ASTC_8x6_KHR: 3891 case FORMAT_RGBA_ASTC_8x8_KHR: 3892 case FORMAT_RGBA_ASTC_10x5_KHR: 3893 case FORMAT_RGBA_ASTC_10x6_KHR: 3894 case FORMAT_RGBA_ASTC_10x8_KHR: 3895 case FORMAT_RGBA_ASTC_10x10_KHR: 3896 case FORMAT_RGBA_ASTC_12x10_KHR: 3897 case FORMAT_RGBA_ASTC_12x12_KHR: 3898 // ASTC supports HDR, so a floating point format is required to represent it properly 3899 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported 3900 case FORMAT_ATI1: 3901 return FORMAT_R8; 3902 case FORMAT_R11_EAC: 3903 case FORMAT_SIGNED_R11_EAC: 3904 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient 3905 case FORMAT_ATI2: 3906 return FORMAT_G8R8; 3907 case FORMAT_RG11_EAC: 3908 case FORMAT_SIGNED_RG11_EAC: 3909 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient 3910 case FORMAT_ETC1: 3911 case FORMAT_RGB8_ETC2: 3912 case FORMAT_SRGB8_ETC2: 3913 return FORMAT_X8R8G8B8; 3914 // Bumpmap formats 3915 case FORMAT_V8U8: return FORMAT_V8U8; 3916 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8; 3917 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8; 3918 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8; 3919 case FORMAT_V16U16: return FORMAT_V16U16; 3920 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16; 3921 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16; 3922 // Floating-point formats 3923 case FORMAT_A16F: return FORMAT_A32B32G32R32F; 3924 case FORMAT_R16F: return FORMAT_R32F; 3925 case FORMAT_G16R16F: return FORMAT_G32R32F; 3926 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F; 3927 case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F; 3928 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; 3929 case FORMAT_X16B16G16R16F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED; 3930 case FORMAT_A32F: return FORMAT_A32B32G32R32F; 3931 case FORMAT_R32F: return FORMAT_R32F; 3932 case FORMAT_G32R32F: return FORMAT_G32R32F; 3933 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F; 3934 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F; 3935 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F; 3936 case FORMAT_X32B32G32R32F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED; 3937 // Luminance formats 3938 case FORMAT_L8: return FORMAT_L8; 3939 case FORMAT_A4L4: return FORMAT_A8L8; 3940 case FORMAT_L16: return FORMAT_L16; 3941 case FORMAT_A8L8: return FORMAT_A8L8; 3942 case FORMAT_L16F: return FORMAT_X32B32G32R32F; 3943 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F; 3944 case FORMAT_L32F: return FORMAT_X32B32G32R32F; 3945 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F; 3946 // Depth/stencil formats 3947 case FORMAT_D16: 3948 case FORMAT_D32: 3949 case FORMAT_D24X8: 3950 if(hasParent) // Texture 3951 { 3952 return FORMAT_D32F_SHADOW; 3953 } 3954 else if(complementaryDepthBuffer) 3955 { 3956 return FORMAT_D32F_COMPLEMENTARY; 3957 } 3958 else 3959 { 3960 return FORMAT_D32F; 3961 } 3962 case FORMAT_D24S8: 3963 case FORMAT_D24FS8: 3964 if(hasParent) // Texture 3965 { 3966 return FORMAT_D32FS8_SHADOW; 3967 } 3968 else if(complementaryDepthBuffer) 3969 { 3970 return FORMAT_D32FS8_COMPLEMENTARY; 3971 } 3972 else 3973 { 3974 return FORMAT_D32FS8; 3975 } 3976 case FORMAT_D32F: return FORMAT_D32F; 3977 case FORMAT_D32FS8: return FORMAT_D32FS8; 3978 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE; 3979 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE; 3980 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; 3981 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; 3982 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; 3983 case FORMAT_S8: return FORMAT_S8; 3984 // YUV formats 3985 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; 3986 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; 3987 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; 3988 default: 3989 ASSERT(false); 3990 } 3991 3992 return FORMAT_NULL; 3993 } 3994 setTexturePalette(unsigned int * palette)3995 void Surface::setTexturePalette(unsigned int *palette) 3996 { 3997 Surface::palette = palette; 3998 Surface::paletteID++; 3999 } 4000 resolve()4001 void Surface::resolve() 4002 { 4003 if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL) 4004 { 4005 return; 4006 } 4007 4008 ASSERT(internal.depth == 1); // Unimplemented 4009 4010 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE); 4011 4012 int width = internal.width; 4013 int height = internal.height; 4014 int pitch = internal.pitchB; 4015 int slice = internal.sliceB; 4016 4017 unsigned char *source0 = (unsigned char*)source; 4018 unsigned char *source1 = source0 + slice; 4019 unsigned char *source2 = source1 + slice; 4020 unsigned char *source3 = source2 + slice; 4021 unsigned char *source4 = source3 + slice; 4022 unsigned char *source5 = source4 + slice; 4023 unsigned char *source6 = source5 + slice; 4024 unsigned char *source7 = source6 + slice; 4025 unsigned char *source8 = source7 + slice; 4026 unsigned char *source9 = source8 + slice; 4027 unsigned char *sourceA = source9 + slice; 4028 unsigned char *sourceB = sourceA + slice; 4029 unsigned char *sourceC = sourceB + slice; 4030 unsigned char *sourceD = sourceC + slice; 4031 unsigned char *sourceE = sourceD + slice; 4032 unsigned char *sourceF = sourceE + slice; 4033 4034 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || 4035 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 || 4036 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8) 4037 { 4038 #if defined(__i386__) || defined(__x86_64__) 4039 if(CPUID::supportsSSE2() && (width % 4) == 0) 4040 { 4041 if(internal.samples == 2) 4042 { 4043 for(int y = 0; y < height; y++) 4044 { 4045 for(int x = 0; x < width; x += 4) 4046 { 4047 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4048 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4049 4050 c0 = _mm_avg_epu8(c0, c1); 4051 4052 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4053 } 4054 4055 source0 += pitch; 4056 source1 += pitch; 4057 } 4058 } 4059 else if(internal.samples == 4) 4060 { 4061 for(int y = 0; y < height; y++) 4062 { 4063 for(int x = 0; x < width; x += 4) 4064 { 4065 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4066 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4067 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4068 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4069 4070 c0 = _mm_avg_epu8(c0, c1); 4071 c2 = _mm_avg_epu8(c2, c3); 4072 c0 = _mm_avg_epu8(c0, c2); 4073 4074 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4075 } 4076 4077 source0 += pitch; 4078 source1 += pitch; 4079 source2 += pitch; 4080 source3 += pitch; 4081 } 4082 } 4083 else if(internal.samples == 8) 4084 { 4085 for(int y = 0; y < height; y++) 4086 { 4087 for(int x = 0; x < width; x += 4) 4088 { 4089 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4090 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4091 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4092 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4093 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4094 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4095 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4096 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4097 4098 c0 = _mm_avg_epu8(c0, c1); 4099 c2 = _mm_avg_epu8(c2, c3); 4100 c4 = _mm_avg_epu8(c4, c5); 4101 c6 = _mm_avg_epu8(c6, c7); 4102 c0 = _mm_avg_epu8(c0, c2); 4103 c4 = _mm_avg_epu8(c4, c6); 4104 c0 = _mm_avg_epu8(c0, c4); 4105 4106 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4107 } 4108 4109 source0 += pitch; 4110 source1 += pitch; 4111 source2 += pitch; 4112 source3 += pitch; 4113 source4 += pitch; 4114 source5 += pitch; 4115 source6 += pitch; 4116 source7 += pitch; 4117 } 4118 } 4119 else if(internal.samples == 16) 4120 { 4121 for(int y = 0; y < height; y++) 4122 { 4123 for(int x = 0; x < width; x += 4) 4124 { 4125 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4126 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4127 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4128 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4129 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4130 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4131 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4132 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4133 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4134 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4135 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4136 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4137 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4138 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4139 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4140 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4141 4142 c0 = _mm_avg_epu8(c0, c1); 4143 c2 = _mm_avg_epu8(c2, c3); 4144 c4 = _mm_avg_epu8(c4, c5); 4145 c6 = _mm_avg_epu8(c6, c7); 4146 c8 = _mm_avg_epu8(c8, c9); 4147 cA = _mm_avg_epu8(cA, cB); 4148 cC = _mm_avg_epu8(cC, cD); 4149 cE = _mm_avg_epu8(cE, cF); 4150 c0 = _mm_avg_epu8(c0, c2); 4151 c4 = _mm_avg_epu8(c4, c6); 4152 c8 = _mm_avg_epu8(c8, cA); 4153 cC = _mm_avg_epu8(cC, cE); 4154 c0 = _mm_avg_epu8(c0, c4); 4155 c8 = _mm_avg_epu8(c8, cC); 4156 c0 = _mm_avg_epu8(c0, c8); 4157 4158 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4159 } 4160 4161 source0 += pitch; 4162 source1 += pitch; 4163 source2 += pitch; 4164 source3 += pitch; 4165 source4 += pitch; 4166 source5 += pitch; 4167 source6 += pitch; 4168 source7 += pitch; 4169 source8 += pitch; 4170 source9 += pitch; 4171 sourceA += pitch; 4172 sourceB += pitch; 4173 sourceC += pitch; 4174 sourceD += pitch; 4175 sourceE += pitch; 4176 sourceF += pitch; 4177 } 4178 } 4179 else ASSERT(false); 4180 } 4181 else 4182 #endif 4183 { 4184 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101)) 4185 4186 if(internal.samples == 2) 4187 { 4188 for(int y = 0; y < height; y++) 4189 { 4190 for(int x = 0; x < width; x++) 4191 { 4192 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4193 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4194 4195 c0 = AVERAGE(c0, c1); 4196 4197 *(unsigned int*)(source0 + 4 * x) = c0; 4198 } 4199 4200 source0 += pitch; 4201 source1 += pitch; 4202 } 4203 } 4204 else if(internal.samples == 4) 4205 { 4206 for(int y = 0; y < height; y++) 4207 { 4208 for(int x = 0; x < width; x++) 4209 { 4210 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4211 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4212 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4213 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4214 4215 c0 = AVERAGE(c0, c1); 4216 c2 = AVERAGE(c2, c3); 4217 c0 = AVERAGE(c0, c2); 4218 4219 *(unsigned int*)(source0 + 4 * x) = c0; 4220 } 4221 4222 source0 += pitch; 4223 source1 += pitch; 4224 source2 += pitch; 4225 source3 += pitch; 4226 } 4227 } 4228 else if(internal.samples == 8) 4229 { 4230 for(int y = 0; y < height; y++) 4231 { 4232 for(int x = 0; x < width; x++) 4233 { 4234 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4235 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4236 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4237 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4238 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4239 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4240 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4241 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4242 4243 c0 = AVERAGE(c0, c1); 4244 c2 = AVERAGE(c2, c3); 4245 c4 = AVERAGE(c4, c5); 4246 c6 = AVERAGE(c6, c7); 4247 c0 = AVERAGE(c0, c2); 4248 c4 = AVERAGE(c4, c6); 4249 c0 = AVERAGE(c0, c4); 4250 4251 *(unsigned int*)(source0 + 4 * x) = c0; 4252 } 4253 4254 source0 += pitch; 4255 source1 += pitch; 4256 source2 += pitch; 4257 source3 += pitch; 4258 source4 += pitch; 4259 source5 += pitch; 4260 source6 += pitch; 4261 source7 += pitch; 4262 } 4263 } 4264 else if(internal.samples == 16) 4265 { 4266 for(int y = 0; y < height; y++) 4267 { 4268 for(int x = 0; x < width; x++) 4269 { 4270 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4271 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4272 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4273 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4274 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4275 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4276 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4277 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4278 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4279 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4280 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4281 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4282 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4283 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4284 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4285 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4286 4287 c0 = AVERAGE(c0, c1); 4288 c2 = AVERAGE(c2, c3); 4289 c4 = AVERAGE(c4, c5); 4290 c6 = AVERAGE(c6, c7); 4291 c8 = AVERAGE(c8, c9); 4292 cA = AVERAGE(cA, cB); 4293 cC = AVERAGE(cC, cD); 4294 cE = AVERAGE(cE, cF); 4295 c0 = AVERAGE(c0, c2); 4296 c4 = AVERAGE(c4, c6); 4297 c8 = AVERAGE(c8, cA); 4298 cC = AVERAGE(cC, cE); 4299 c0 = AVERAGE(c0, c4); 4300 c8 = AVERAGE(c8, cC); 4301 c0 = AVERAGE(c0, c8); 4302 4303 *(unsigned int*)(source0 + 4 * x) = c0; 4304 } 4305 4306 source0 += pitch; 4307 source1 += pitch; 4308 source2 += pitch; 4309 source3 += pitch; 4310 source4 += pitch; 4311 source5 += pitch; 4312 source6 += pitch; 4313 source7 += pitch; 4314 source8 += pitch; 4315 source9 += pitch; 4316 sourceA += pitch; 4317 sourceB += pitch; 4318 sourceC += pitch; 4319 sourceD += pitch; 4320 sourceE += pitch; 4321 sourceF += pitch; 4322 } 4323 } 4324 else ASSERT(false); 4325 4326 #undef AVERAGE 4327 } 4328 } 4329 else if(internal.format == FORMAT_G16R16) 4330 { 4331 4332 #if defined(__i386__) || defined(__x86_64__) 4333 if(CPUID::supportsSSE2() && (width % 4) == 0) 4334 { 4335 if(internal.samples == 2) 4336 { 4337 for(int y = 0; y < height; y++) 4338 { 4339 for(int x = 0; x < width; x += 4) 4340 { 4341 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4342 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4343 4344 c0 = _mm_avg_epu16(c0, c1); 4345 4346 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4347 } 4348 4349 source0 += pitch; 4350 source1 += pitch; 4351 } 4352 } 4353 else if(internal.samples == 4) 4354 { 4355 for(int y = 0; y < height; y++) 4356 { 4357 for(int x = 0; x < width; x += 4) 4358 { 4359 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4360 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4361 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4362 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4363 4364 c0 = _mm_avg_epu16(c0, c1); 4365 c2 = _mm_avg_epu16(c2, c3); 4366 c0 = _mm_avg_epu16(c0, c2); 4367 4368 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4369 } 4370 4371 source0 += pitch; 4372 source1 += pitch; 4373 source2 += pitch; 4374 source3 += pitch; 4375 } 4376 } 4377 else if(internal.samples == 8) 4378 { 4379 for(int y = 0; y < height; y++) 4380 { 4381 for(int x = 0; x < width; x += 4) 4382 { 4383 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4384 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4385 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4386 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4387 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4388 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4389 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4390 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4391 4392 c0 = _mm_avg_epu16(c0, c1); 4393 c2 = _mm_avg_epu16(c2, c3); 4394 c4 = _mm_avg_epu16(c4, c5); 4395 c6 = _mm_avg_epu16(c6, c7); 4396 c0 = _mm_avg_epu16(c0, c2); 4397 c4 = _mm_avg_epu16(c4, c6); 4398 c0 = _mm_avg_epu16(c0, c4); 4399 4400 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4401 } 4402 4403 source0 += pitch; 4404 source1 += pitch; 4405 source2 += pitch; 4406 source3 += pitch; 4407 source4 += pitch; 4408 source5 += pitch; 4409 source6 += pitch; 4410 source7 += pitch; 4411 } 4412 } 4413 else if(internal.samples == 16) 4414 { 4415 for(int y = 0; y < height; y++) 4416 { 4417 for(int x = 0; x < width; x += 4) 4418 { 4419 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4420 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4421 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4422 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4423 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4424 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4425 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4426 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4427 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4428 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4429 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4430 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4431 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4432 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4433 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4434 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4435 4436 c0 = _mm_avg_epu16(c0, c1); 4437 c2 = _mm_avg_epu16(c2, c3); 4438 c4 = _mm_avg_epu16(c4, c5); 4439 c6 = _mm_avg_epu16(c6, c7); 4440 c8 = _mm_avg_epu16(c8, c9); 4441 cA = _mm_avg_epu16(cA, cB); 4442 cC = _mm_avg_epu16(cC, cD); 4443 cE = _mm_avg_epu16(cE, cF); 4444 c0 = _mm_avg_epu16(c0, c2); 4445 c4 = _mm_avg_epu16(c4, c6); 4446 c8 = _mm_avg_epu16(c8, cA); 4447 cC = _mm_avg_epu16(cC, cE); 4448 c0 = _mm_avg_epu16(c0, c4); 4449 c8 = _mm_avg_epu16(c8, cC); 4450 c0 = _mm_avg_epu16(c0, c8); 4451 4452 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4453 } 4454 4455 source0 += pitch; 4456 source1 += pitch; 4457 source2 += pitch; 4458 source3 += pitch; 4459 source4 += pitch; 4460 source5 += pitch; 4461 source6 += pitch; 4462 source7 += pitch; 4463 source8 += pitch; 4464 source9 += pitch; 4465 sourceA += pitch; 4466 sourceB += pitch; 4467 sourceC += pitch; 4468 sourceD += pitch; 4469 sourceE += pitch; 4470 sourceF += pitch; 4471 } 4472 } 4473 else ASSERT(false); 4474 } 4475 else 4476 #endif 4477 { 4478 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4479 4480 if(internal.samples == 2) 4481 { 4482 for(int y = 0; y < height; y++) 4483 { 4484 for(int x = 0; x < width; x++) 4485 { 4486 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4487 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4488 4489 c0 = AVERAGE(c0, c1); 4490 4491 *(unsigned int*)(source0 + 4 * x) = c0; 4492 } 4493 4494 source0 += pitch; 4495 source1 += pitch; 4496 } 4497 } 4498 else if(internal.samples == 4) 4499 { 4500 for(int y = 0; y < height; y++) 4501 { 4502 for(int x = 0; x < width; x++) 4503 { 4504 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4505 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4506 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4507 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4508 4509 c0 = AVERAGE(c0, c1); 4510 c2 = AVERAGE(c2, c3); 4511 c0 = AVERAGE(c0, c2); 4512 4513 *(unsigned int*)(source0 + 4 * x) = c0; 4514 } 4515 4516 source0 += pitch; 4517 source1 += pitch; 4518 source2 += pitch; 4519 source3 += pitch; 4520 } 4521 } 4522 else if(internal.samples == 8) 4523 { 4524 for(int y = 0; y < height; y++) 4525 { 4526 for(int x = 0; x < width; x++) 4527 { 4528 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4529 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4530 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4531 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4532 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4533 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4534 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4535 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4536 4537 c0 = AVERAGE(c0, c1); 4538 c2 = AVERAGE(c2, c3); 4539 c4 = AVERAGE(c4, c5); 4540 c6 = AVERAGE(c6, c7); 4541 c0 = AVERAGE(c0, c2); 4542 c4 = AVERAGE(c4, c6); 4543 c0 = AVERAGE(c0, c4); 4544 4545 *(unsigned int*)(source0 + 4 * x) = c0; 4546 } 4547 4548 source0 += pitch; 4549 source1 += pitch; 4550 source2 += pitch; 4551 source3 += pitch; 4552 source4 += pitch; 4553 source5 += pitch; 4554 source6 += pitch; 4555 source7 += pitch; 4556 } 4557 } 4558 else if(internal.samples == 16) 4559 { 4560 for(int y = 0; y < height; y++) 4561 { 4562 for(int x = 0; x < width; x++) 4563 { 4564 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4565 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4566 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4567 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4568 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4569 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4570 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4571 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4572 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4573 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4574 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4575 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4576 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4577 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4578 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4579 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4580 4581 c0 = AVERAGE(c0, c1); 4582 c2 = AVERAGE(c2, c3); 4583 c4 = AVERAGE(c4, c5); 4584 c6 = AVERAGE(c6, c7); 4585 c8 = AVERAGE(c8, c9); 4586 cA = AVERAGE(cA, cB); 4587 cC = AVERAGE(cC, cD); 4588 cE = AVERAGE(cE, cF); 4589 c0 = AVERAGE(c0, c2); 4590 c4 = AVERAGE(c4, c6); 4591 c8 = AVERAGE(c8, cA); 4592 cC = AVERAGE(cC, cE); 4593 c0 = AVERAGE(c0, c4); 4594 c8 = AVERAGE(c8, cC); 4595 c0 = AVERAGE(c0, c8); 4596 4597 *(unsigned int*)(source0 + 4 * x) = c0; 4598 } 4599 4600 source0 += pitch; 4601 source1 += pitch; 4602 source2 += pitch; 4603 source3 += pitch; 4604 source4 += pitch; 4605 source5 += pitch; 4606 source6 += pitch; 4607 source7 += pitch; 4608 source8 += pitch; 4609 source9 += pitch; 4610 sourceA += pitch; 4611 sourceB += pitch; 4612 sourceC += pitch; 4613 sourceD += pitch; 4614 sourceE += pitch; 4615 sourceF += pitch; 4616 } 4617 } 4618 else ASSERT(false); 4619 4620 #undef AVERAGE 4621 } 4622 } 4623 else if(internal.format == FORMAT_A16B16G16R16) 4624 { 4625 #if defined(__i386__) || defined(__x86_64__) 4626 if(CPUID::supportsSSE2() && (width % 2) == 0) 4627 { 4628 if(internal.samples == 2) 4629 { 4630 for(int y = 0; y < height; y++) 4631 { 4632 for(int x = 0; x < width; x += 2) 4633 { 4634 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4635 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4636 4637 c0 = _mm_avg_epu16(c0, c1); 4638 4639 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4640 } 4641 4642 source0 += pitch; 4643 source1 += pitch; 4644 } 4645 } 4646 else if(internal.samples == 4) 4647 { 4648 for(int y = 0; y < height; y++) 4649 { 4650 for(int x = 0; x < width; x += 2) 4651 { 4652 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4653 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4654 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4655 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4656 4657 c0 = _mm_avg_epu16(c0, c1); 4658 c2 = _mm_avg_epu16(c2, c3); 4659 c0 = _mm_avg_epu16(c0, c2); 4660 4661 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4662 } 4663 4664 source0 += pitch; 4665 source1 += pitch; 4666 source2 += pitch; 4667 source3 += pitch; 4668 } 4669 } 4670 else if(internal.samples == 8) 4671 { 4672 for(int y = 0; y < height; y++) 4673 { 4674 for(int x = 0; x < width; x += 2) 4675 { 4676 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4677 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4678 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4679 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4680 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4681 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4682 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4683 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4684 4685 c0 = _mm_avg_epu16(c0, c1); 4686 c2 = _mm_avg_epu16(c2, c3); 4687 c4 = _mm_avg_epu16(c4, c5); 4688 c6 = _mm_avg_epu16(c6, c7); 4689 c0 = _mm_avg_epu16(c0, c2); 4690 c4 = _mm_avg_epu16(c4, c6); 4691 c0 = _mm_avg_epu16(c0, c4); 4692 4693 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4694 } 4695 4696 source0 += pitch; 4697 source1 += pitch; 4698 source2 += pitch; 4699 source3 += pitch; 4700 source4 += pitch; 4701 source5 += pitch; 4702 source6 += pitch; 4703 source7 += pitch; 4704 } 4705 } 4706 else if(internal.samples == 16) 4707 { 4708 for(int y = 0; y < height; y++) 4709 { 4710 for(int x = 0; x < width; x += 2) 4711 { 4712 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4713 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4714 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4715 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4716 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4717 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4718 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4719 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4720 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x)); 4721 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x)); 4722 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x)); 4723 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x)); 4724 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x)); 4725 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x)); 4726 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x)); 4727 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x)); 4728 4729 c0 = _mm_avg_epu16(c0, c1); 4730 c2 = _mm_avg_epu16(c2, c3); 4731 c4 = _mm_avg_epu16(c4, c5); 4732 c6 = _mm_avg_epu16(c6, c7); 4733 c8 = _mm_avg_epu16(c8, c9); 4734 cA = _mm_avg_epu16(cA, cB); 4735 cC = _mm_avg_epu16(cC, cD); 4736 cE = _mm_avg_epu16(cE, cF); 4737 c0 = _mm_avg_epu16(c0, c2); 4738 c4 = _mm_avg_epu16(c4, c6); 4739 c8 = _mm_avg_epu16(c8, cA); 4740 cC = _mm_avg_epu16(cC, cE); 4741 c0 = _mm_avg_epu16(c0, c4); 4742 c8 = _mm_avg_epu16(c8, cC); 4743 c0 = _mm_avg_epu16(c0, c8); 4744 4745 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4746 } 4747 4748 source0 += pitch; 4749 source1 += pitch; 4750 source2 += pitch; 4751 source3 += pitch; 4752 source4 += pitch; 4753 source5 += pitch; 4754 source6 += pitch; 4755 source7 += pitch; 4756 source8 += pitch; 4757 source9 += pitch; 4758 sourceA += pitch; 4759 sourceB += pitch; 4760 sourceC += pitch; 4761 sourceD += pitch; 4762 sourceE += pitch; 4763 sourceF += pitch; 4764 } 4765 } 4766 else ASSERT(false); 4767 } 4768 else 4769 #endif 4770 { 4771 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4772 4773 if(internal.samples == 2) 4774 { 4775 for(int y = 0; y < height; y++) 4776 { 4777 for(int x = 0; x < 2 * width; x++) 4778 { 4779 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4780 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4781 4782 c0 = AVERAGE(c0, c1); 4783 4784 *(unsigned int*)(source0 + 4 * x) = c0; 4785 } 4786 4787 source0 += pitch; 4788 source1 += pitch; 4789 } 4790 } 4791 else if(internal.samples == 4) 4792 { 4793 for(int y = 0; y < height; y++) 4794 { 4795 for(int x = 0; x < 2 * width; x++) 4796 { 4797 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4798 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4799 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4800 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4801 4802 c0 = AVERAGE(c0, c1); 4803 c2 = AVERAGE(c2, c3); 4804 c0 = AVERAGE(c0, c2); 4805 4806 *(unsigned int*)(source0 + 4 * x) = c0; 4807 } 4808 4809 source0 += pitch; 4810 source1 += pitch; 4811 source2 += pitch; 4812 source3 += pitch; 4813 } 4814 } 4815 else if(internal.samples == 8) 4816 { 4817 for(int y = 0; y < height; y++) 4818 { 4819 for(int x = 0; x < 2 * width; x++) 4820 { 4821 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4822 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4823 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4824 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4825 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4826 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4827 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4828 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4829 4830 c0 = AVERAGE(c0, c1); 4831 c2 = AVERAGE(c2, c3); 4832 c4 = AVERAGE(c4, c5); 4833 c6 = AVERAGE(c6, c7); 4834 c0 = AVERAGE(c0, c2); 4835 c4 = AVERAGE(c4, c6); 4836 c0 = AVERAGE(c0, c4); 4837 4838 *(unsigned int*)(source0 + 4 * x) = c0; 4839 } 4840 4841 source0 += pitch; 4842 source1 += pitch; 4843 source2 += pitch; 4844 source3 += pitch; 4845 source4 += pitch; 4846 source5 += pitch; 4847 source6 += pitch; 4848 source7 += pitch; 4849 } 4850 } 4851 else if(internal.samples == 16) 4852 { 4853 for(int y = 0; y < height; y++) 4854 { 4855 for(int x = 0; x < 2 * width; x++) 4856 { 4857 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4858 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4859 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4860 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4861 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4862 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4863 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4864 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4865 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4866 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4867 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4868 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4869 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4870 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4871 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4872 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4873 4874 c0 = AVERAGE(c0, c1); 4875 c2 = AVERAGE(c2, c3); 4876 c4 = AVERAGE(c4, c5); 4877 c6 = AVERAGE(c6, c7); 4878 c8 = AVERAGE(c8, c9); 4879 cA = AVERAGE(cA, cB); 4880 cC = AVERAGE(cC, cD); 4881 cE = AVERAGE(cE, cF); 4882 c0 = AVERAGE(c0, c2); 4883 c4 = AVERAGE(c4, c6); 4884 c8 = AVERAGE(c8, cA); 4885 cC = AVERAGE(cC, cE); 4886 c0 = AVERAGE(c0, c4); 4887 c8 = AVERAGE(c8, cC); 4888 c0 = AVERAGE(c0, c8); 4889 4890 *(unsigned int*)(source0 + 4 * x) = c0; 4891 } 4892 4893 source0 += pitch; 4894 source1 += pitch; 4895 source2 += pitch; 4896 source3 += pitch; 4897 source4 += pitch; 4898 source5 += pitch; 4899 source6 += pitch; 4900 source7 += pitch; 4901 source8 += pitch; 4902 source9 += pitch; 4903 sourceA += pitch; 4904 sourceB += pitch; 4905 sourceC += pitch; 4906 sourceD += pitch; 4907 sourceE += pitch; 4908 sourceF += pitch; 4909 } 4910 } 4911 else ASSERT(false); 4912 4913 #undef AVERAGE 4914 } 4915 } 4916 else if(internal.format == FORMAT_R32F) 4917 { 4918 #if defined(__i386__) || defined(__x86_64__) 4919 if(CPUID::supportsSSE() && (width % 4) == 0) 4920 { 4921 if(internal.samples == 2) 4922 { 4923 for(int y = 0; y < height; y++) 4924 { 4925 for(int x = 0; x < width; x += 4) 4926 { 4927 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4928 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4929 4930 c0 = _mm_add_ps(c0, c1); 4931 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4932 4933 _mm_store_ps((float*)(source0 + 4 * x), c0); 4934 } 4935 4936 source0 += pitch; 4937 source1 += pitch; 4938 } 4939 } 4940 else if(internal.samples == 4) 4941 { 4942 for(int y = 0; y < height; y++) 4943 { 4944 for(int x = 0; x < width; x += 4) 4945 { 4946 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4947 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4948 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4949 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4950 4951 c0 = _mm_add_ps(c0, c1); 4952 c2 = _mm_add_ps(c2, c3); 4953 c0 = _mm_add_ps(c0, c2); 4954 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4955 4956 _mm_store_ps((float*)(source0 + 4 * x), c0); 4957 } 4958 4959 source0 += pitch; 4960 source1 += pitch; 4961 source2 += pitch; 4962 source3 += pitch; 4963 } 4964 } 4965 else if(internal.samples == 8) 4966 { 4967 for(int y = 0; y < height; y++) 4968 { 4969 for(int x = 0; x < width; x += 4) 4970 { 4971 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4972 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4973 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4974 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4975 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4976 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4977 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4978 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4979 4980 c0 = _mm_add_ps(c0, c1); 4981 c2 = _mm_add_ps(c2, c3); 4982 c4 = _mm_add_ps(c4, c5); 4983 c6 = _mm_add_ps(c6, c7); 4984 c0 = _mm_add_ps(c0, c2); 4985 c4 = _mm_add_ps(c4, c6); 4986 c0 = _mm_add_ps(c0, c4); 4987 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4988 4989 _mm_store_ps((float*)(source0 + 4 * x), c0); 4990 } 4991 4992 source0 += pitch; 4993 source1 += pitch; 4994 source2 += pitch; 4995 source3 += pitch; 4996 source4 += pitch; 4997 source5 += pitch; 4998 source6 += pitch; 4999 source7 += pitch; 5000 } 5001 } 5002 else if(internal.samples == 16) 5003 { 5004 for(int y = 0; y < height; y++) 5005 { 5006 for(int x = 0; x < width; x += 4) 5007 { 5008 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 5009 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 5010 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 5011 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 5012 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 5013 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 5014 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 5015 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 5016 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x)); 5017 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x)); 5018 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x)); 5019 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x)); 5020 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x)); 5021 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x)); 5022 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x)); 5023 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x)); 5024 5025 c0 = _mm_add_ps(c0, c1); 5026 c2 = _mm_add_ps(c2, c3); 5027 c4 = _mm_add_ps(c4, c5); 5028 c6 = _mm_add_ps(c6, c7); 5029 c8 = _mm_add_ps(c8, c9); 5030 cA = _mm_add_ps(cA, cB); 5031 cC = _mm_add_ps(cC, cD); 5032 cE = _mm_add_ps(cE, cF); 5033 c0 = _mm_add_ps(c0, c2); 5034 c4 = _mm_add_ps(c4, c6); 5035 c8 = _mm_add_ps(c8, cA); 5036 cC = _mm_add_ps(cC, cE); 5037 c0 = _mm_add_ps(c0, c4); 5038 c8 = _mm_add_ps(c8, cC); 5039 c0 = _mm_add_ps(c0, c8); 5040 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5041 5042 _mm_store_ps((float*)(source0 + 4 * x), c0); 5043 } 5044 5045 source0 += pitch; 5046 source1 += pitch; 5047 source2 += pitch; 5048 source3 += pitch; 5049 source4 += pitch; 5050 source5 += pitch; 5051 source6 += pitch; 5052 source7 += pitch; 5053 source8 += pitch; 5054 source9 += pitch; 5055 sourceA += pitch; 5056 sourceB += pitch; 5057 sourceC += pitch; 5058 sourceD += pitch; 5059 sourceE += pitch; 5060 sourceF += pitch; 5061 } 5062 } 5063 else ASSERT(false); 5064 } 5065 else 5066 #endif 5067 { 5068 if(internal.samples == 2) 5069 { 5070 for(int y = 0; y < height; y++) 5071 { 5072 for(int x = 0; x < width; x++) 5073 { 5074 float c0 = *(float*)(source0 + 4 * x); 5075 float c1 = *(float*)(source1 + 4 * x); 5076 5077 c0 = c0 + c1; 5078 c0 *= 1.0f / 2.0f; 5079 5080 *(float*)(source0 + 4 * x) = c0; 5081 } 5082 5083 source0 += pitch; 5084 source1 += pitch; 5085 } 5086 } 5087 else if(internal.samples == 4) 5088 { 5089 for(int y = 0; y < height; y++) 5090 { 5091 for(int x = 0; x < width; x++) 5092 { 5093 float c0 = *(float*)(source0 + 4 * x); 5094 float c1 = *(float*)(source1 + 4 * x); 5095 float c2 = *(float*)(source2 + 4 * x); 5096 float c3 = *(float*)(source3 + 4 * x); 5097 5098 c0 = c0 + c1; 5099 c2 = c2 + c3; 5100 c0 = c0 + c2; 5101 c0 *= 1.0f / 4.0f; 5102 5103 *(float*)(source0 + 4 * x) = c0; 5104 } 5105 5106 source0 += pitch; 5107 source1 += pitch; 5108 source2 += pitch; 5109 source3 += pitch; 5110 } 5111 } 5112 else if(internal.samples == 8) 5113 { 5114 for(int y = 0; y < height; y++) 5115 { 5116 for(int x = 0; x < width; x++) 5117 { 5118 float c0 = *(float*)(source0 + 4 * x); 5119 float c1 = *(float*)(source1 + 4 * x); 5120 float c2 = *(float*)(source2 + 4 * x); 5121 float c3 = *(float*)(source3 + 4 * x); 5122 float c4 = *(float*)(source4 + 4 * x); 5123 float c5 = *(float*)(source5 + 4 * x); 5124 float c6 = *(float*)(source6 + 4 * x); 5125 float c7 = *(float*)(source7 + 4 * x); 5126 5127 c0 = c0 + c1; 5128 c2 = c2 + c3; 5129 c4 = c4 + c5; 5130 c6 = c6 + c7; 5131 c0 = c0 + c2; 5132 c4 = c4 + c6; 5133 c0 = c0 + c4; 5134 c0 *= 1.0f / 8.0f; 5135 5136 *(float*)(source0 + 4 * x) = c0; 5137 } 5138 5139 source0 += pitch; 5140 source1 += pitch; 5141 source2 += pitch; 5142 source3 += pitch; 5143 source4 += pitch; 5144 source5 += pitch; 5145 source6 += pitch; 5146 source7 += pitch; 5147 } 5148 } 5149 else if(internal.samples == 16) 5150 { 5151 for(int y = 0; y < height; y++) 5152 { 5153 for(int x = 0; x < width; x++) 5154 { 5155 float c0 = *(float*)(source0 + 4 * x); 5156 float c1 = *(float*)(source1 + 4 * x); 5157 float c2 = *(float*)(source2 + 4 * x); 5158 float c3 = *(float*)(source3 + 4 * x); 5159 float c4 = *(float*)(source4 + 4 * x); 5160 float c5 = *(float*)(source5 + 4 * x); 5161 float c6 = *(float*)(source6 + 4 * x); 5162 float c7 = *(float*)(source7 + 4 * x); 5163 float c8 = *(float*)(source8 + 4 * x); 5164 float c9 = *(float*)(source9 + 4 * x); 5165 float cA = *(float*)(sourceA + 4 * x); 5166 float cB = *(float*)(sourceB + 4 * x); 5167 float cC = *(float*)(sourceC + 4 * x); 5168 float cD = *(float*)(sourceD + 4 * x); 5169 float cE = *(float*)(sourceE + 4 * x); 5170 float cF = *(float*)(sourceF + 4 * x); 5171 5172 c0 = c0 + c1; 5173 c2 = c2 + c3; 5174 c4 = c4 + c5; 5175 c6 = c6 + c7; 5176 c8 = c8 + c9; 5177 cA = cA + cB; 5178 cC = cC + cD; 5179 cE = cE + cF; 5180 c0 = c0 + c2; 5181 c4 = c4 + c6; 5182 c8 = c8 + cA; 5183 cC = cC + cE; 5184 c0 = c0 + c4; 5185 c8 = c8 + cC; 5186 c0 = c0 + c8; 5187 c0 *= 1.0f / 16.0f; 5188 5189 *(float*)(source0 + 4 * x) = c0; 5190 } 5191 5192 source0 += pitch; 5193 source1 += pitch; 5194 source2 += pitch; 5195 source3 += pitch; 5196 source4 += pitch; 5197 source5 += pitch; 5198 source6 += pitch; 5199 source7 += pitch; 5200 source8 += pitch; 5201 source9 += pitch; 5202 sourceA += pitch; 5203 sourceB += pitch; 5204 sourceC += pitch; 5205 sourceD += pitch; 5206 sourceE += pitch; 5207 sourceF += pitch; 5208 } 5209 } 5210 else ASSERT(false); 5211 } 5212 } 5213 else if(internal.format == FORMAT_G32R32F) 5214 { 5215 #if defined(__i386__) || defined(__x86_64__) 5216 if(CPUID::supportsSSE() && (width % 2) == 0) 5217 { 5218 if(internal.samples == 2) 5219 { 5220 for(int y = 0; y < height; y++) 5221 { 5222 for(int x = 0; x < width; x += 2) 5223 { 5224 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5225 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5226 5227 c0 = _mm_add_ps(c0, c1); 5228 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5229 5230 _mm_store_ps((float*)(source0 + 8 * x), c0); 5231 } 5232 5233 source0 += pitch; 5234 source1 += pitch; 5235 } 5236 } 5237 else if(internal.samples == 4) 5238 { 5239 for(int y = 0; y < height; y++) 5240 { 5241 for(int x = 0; x < width; x += 2) 5242 { 5243 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5244 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5245 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5246 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5247 5248 c0 = _mm_add_ps(c0, c1); 5249 c2 = _mm_add_ps(c2, c3); 5250 c0 = _mm_add_ps(c0, c2); 5251 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5252 5253 _mm_store_ps((float*)(source0 + 8 * x), c0); 5254 } 5255 5256 source0 += pitch; 5257 source1 += pitch; 5258 source2 += pitch; 5259 source3 += pitch; 5260 } 5261 } 5262 else if(internal.samples == 8) 5263 { 5264 for(int y = 0; y < height; y++) 5265 { 5266 for(int x = 0; x < width; x += 2) 5267 { 5268 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5269 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5270 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5271 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5272 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5273 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5274 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5275 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5276 5277 c0 = _mm_add_ps(c0, c1); 5278 c2 = _mm_add_ps(c2, c3); 5279 c4 = _mm_add_ps(c4, c5); 5280 c6 = _mm_add_ps(c6, c7); 5281 c0 = _mm_add_ps(c0, c2); 5282 c4 = _mm_add_ps(c4, c6); 5283 c0 = _mm_add_ps(c0, c4); 5284 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5285 5286 _mm_store_ps((float*)(source0 + 8 * x), c0); 5287 } 5288 5289 source0 += pitch; 5290 source1 += pitch; 5291 source2 += pitch; 5292 source3 += pitch; 5293 source4 += pitch; 5294 source5 += pitch; 5295 source6 += pitch; 5296 source7 += pitch; 5297 } 5298 } 5299 else if(internal.samples == 16) 5300 { 5301 for(int y = 0; y < height; y++) 5302 { 5303 for(int x = 0; x < width; x += 2) 5304 { 5305 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5306 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5307 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5308 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5309 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5310 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5311 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5312 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5313 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x)); 5314 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x)); 5315 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x)); 5316 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x)); 5317 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x)); 5318 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x)); 5319 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x)); 5320 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x)); 5321 5322 c0 = _mm_add_ps(c0, c1); 5323 c2 = _mm_add_ps(c2, c3); 5324 c4 = _mm_add_ps(c4, c5); 5325 c6 = _mm_add_ps(c6, c7); 5326 c8 = _mm_add_ps(c8, c9); 5327 cA = _mm_add_ps(cA, cB); 5328 cC = _mm_add_ps(cC, cD); 5329 cE = _mm_add_ps(cE, cF); 5330 c0 = _mm_add_ps(c0, c2); 5331 c4 = _mm_add_ps(c4, c6); 5332 c8 = _mm_add_ps(c8, cA); 5333 cC = _mm_add_ps(cC, cE); 5334 c0 = _mm_add_ps(c0, c4); 5335 c8 = _mm_add_ps(c8, cC); 5336 c0 = _mm_add_ps(c0, c8); 5337 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5338 5339 _mm_store_ps((float*)(source0 + 8 * x), c0); 5340 } 5341 5342 source0 += pitch; 5343 source1 += pitch; 5344 source2 += pitch; 5345 source3 += pitch; 5346 source4 += pitch; 5347 source5 += pitch; 5348 source6 += pitch; 5349 source7 += pitch; 5350 source8 += pitch; 5351 source9 += pitch; 5352 sourceA += pitch; 5353 sourceB += pitch; 5354 sourceC += pitch; 5355 sourceD += pitch; 5356 sourceE += pitch; 5357 sourceF += pitch; 5358 } 5359 } 5360 else ASSERT(false); 5361 } 5362 else 5363 #endif 5364 { 5365 if(internal.samples == 2) 5366 { 5367 for(int y = 0; y < height; y++) 5368 { 5369 for(int x = 0; x < 2 * width; x++) 5370 { 5371 float c0 = *(float*)(source0 + 4 * x); 5372 float c1 = *(float*)(source1 + 4 * x); 5373 5374 c0 = c0 + c1; 5375 c0 *= 1.0f / 2.0f; 5376 5377 *(float*)(source0 + 4 * x) = c0; 5378 } 5379 5380 source0 += pitch; 5381 source1 += pitch; 5382 } 5383 } 5384 else if(internal.samples == 4) 5385 { 5386 for(int y = 0; y < height; y++) 5387 { 5388 for(int x = 0; x < 2 * width; x++) 5389 { 5390 float c0 = *(float*)(source0 + 4 * x); 5391 float c1 = *(float*)(source1 + 4 * x); 5392 float c2 = *(float*)(source2 + 4 * x); 5393 float c3 = *(float*)(source3 + 4 * x); 5394 5395 c0 = c0 + c1; 5396 c2 = c2 + c3; 5397 c0 = c0 + c2; 5398 c0 *= 1.0f / 4.0f; 5399 5400 *(float*)(source0 + 4 * x) = c0; 5401 } 5402 5403 source0 += pitch; 5404 source1 += pitch; 5405 source2 += pitch; 5406 source3 += pitch; 5407 } 5408 } 5409 else if(internal.samples == 8) 5410 { 5411 for(int y = 0; y < height; y++) 5412 { 5413 for(int x = 0; x < 2 * width; x++) 5414 { 5415 float c0 = *(float*)(source0 + 4 * x); 5416 float c1 = *(float*)(source1 + 4 * x); 5417 float c2 = *(float*)(source2 + 4 * x); 5418 float c3 = *(float*)(source3 + 4 * x); 5419 float c4 = *(float*)(source4 + 4 * x); 5420 float c5 = *(float*)(source5 + 4 * x); 5421 float c6 = *(float*)(source6 + 4 * x); 5422 float c7 = *(float*)(source7 + 4 * x); 5423 5424 c0 = c0 + c1; 5425 c2 = c2 + c3; 5426 c4 = c4 + c5; 5427 c6 = c6 + c7; 5428 c0 = c0 + c2; 5429 c4 = c4 + c6; 5430 c0 = c0 + c4; 5431 c0 *= 1.0f / 8.0f; 5432 5433 *(float*)(source0 + 4 * x) = c0; 5434 } 5435 5436 source0 += pitch; 5437 source1 += pitch; 5438 source2 += pitch; 5439 source3 += pitch; 5440 source4 += pitch; 5441 source5 += pitch; 5442 source6 += pitch; 5443 source7 += pitch; 5444 } 5445 } 5446 else if(internal.samples == 16) 5447 { 5448 for(int y = 0; y < height; y++) 5449 { 5450 for(int x = 0; x < 2 * width; x++) 5451 { 5452 float c0 = *(float*)(source0 + 4 * x); 5453 float c1 = *(float*)(source1 + 4 * x); 5454 float c2 = *(float*)(source2 + 4 * x); 5455 float c3 = *(float*)(source3 + 4 * x); 5456 float c4 = *(float*)(source4 + 4 * x); 5457 float c5 = *(float*)(source5 + 4 * x); 5458 float c6 = *(float*)(source6 + 4 * x); 5459 float c7 = *(float*)(source7 + 4 * x); 5460 float c8 = *(float*)(source8 + 4 * x); 5461 float c9 = *(float*)(source9 + 4 * x); 5462 float cA = *(float*)(sourceA + 4 * x); 5463 float cB = *(float*)(sourceB + 4 * x); 5464 float cC = *(float*)(sourceC + 4 * x); 5465 float cD = *(float*)(sourceD + 4 * x); 5466 float cE = *(float*)(sourceE + 4 * x); 5467 float cF = *(float*)(sourceF + 4 * x); 5468 5469 c0 = c0 + c1; 5470 c2 = c2 + c3; 5471 c4 = c4 + c5; 5472 c6 = c6 + c7; 5473 c8 = c8 + c9; 5474 cA = cA + cB; 5475 cC = cC + cD; 5476 cE = cE + cF; 5477 c0 = c0 + c2; 5478 c4 = c4 + c6; 5479 c8 = c8 + cA; 5480 cC = cC + cE; 5481 c0 = c0 + c4; 5482 c8 = c8 + cC; 5483 c0 = c0 + c8; 5484 c0 *= 1.0f / 16.0f; 5485 5486 *(float*)(source0 + 4 * x) = c0; 5487 } 5488 5489 source0 += pitch; 5490 source1 += pitch; 5491 source2 += pitch; 5492 source3 += pitch; 5493 source4 += pitch; 5494 source5 += pitch; 5495 source6 += pitch; 5496 source7 += pitch; 5497 source8 += pitch; 5498 source9 += pitch; 5499 sourceA += pitch; 5500 sourceB += pitch; 5501 sourceC += pitch; 5502 sourceD += pitch; 5503 sourceE += pitch; 5504 sourceF += pitch; 5505 } 5506 } 5507 else ASSERT(false); 5508 } 5509 } 5510 else if(internal.format == FORMAT_A32B32G32R32F || 5511 internal.format == FORMAT_X32B32G32R32F || 5512 internal.format == FORMAT_X32B32G32R32F_UNSIGNED) 5513 { 5514 #if defined(__i386__) || defined(__x86_64__) 5515 if(CPUID::supportsSSE()) 5516 { 5517 if(internal.samples == 2) 5518 { 5519 for(int y = 0; y < height; y++) 5520 { 5521 for(int x = 0; x < width; x++) 5522 { 5523 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5524 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5525 5526 c0 = _mm_add_ps(c0, c1); 5527 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5528 5529 _mm_store_ps((float*)(source0 + 16 * x), c0); 5530 } 5531 5532 source0 += pitch; 5533 source1 += pitch; 5534 } 5535 } 5536 else if(internal.samples == 4) 5537 { 5538 for(int y = 0; y < height; y++) 5539 { 5540 for(int x = 0; x < width; x++) 5541 { 5542 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5543 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5544 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5545 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5546 5547 c0 = _mm_add_ps(c0, c1); 5548 c2 = _mm_add_ps(c2, c3); 5549 c0 = _mm_add_ps(c0, c2); 5550 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5551 5552 _mm_store_ps((float*)(source0 + 16 * x), c0); 5553 } 5554 5555 source0 += pitch; 5556 source1 += pitch; 5557 source2 += pitch; 5558 source3 += pitch; 5559 } 5560 } 5561 else if(internal.samples == 8) 5562 { 5563 for(int y = 0; y < height; y++) 5564 { 5565 for(int x = 0; x < width; x++) 5566 { 5567 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5568 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5569 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5570 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5571 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5572 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5573 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5574 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5575 5576 c0 = _mm_add_ps(c0, c1); 5577 c2 = _mm_add_ps(c2, c3); 5578 c4 = _mm_add_ps(c4, c5); 5579 c6 = _mm_add_ps(c6, c7); 5580 c0 = _mm_add_ps(c0, c2); 5581 c4 = _mm_add_ps(c4, c6); 5582 c0 = _mm_add_ps(c0, c4); 5583 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5584 5585 _mm_store_ps((float*)(source0 + 16 * x), c0); 5586 } 5587 5588 source0 += pitch; 5589 source1 += pitch; 5590 source2 += pitch; 5591 source3 += pitch; 5592 source4 += pitch; 5593 source5 += pitch; 5594 source6 += pitch; 5595 source7 += pitch; 5596 } 5597 } 5598 else if(internal.samples == 16) 5599 { 5600 for(int y = 0; y < height; y++) 5601 { 5602 for(int x = 0; x < width; x++) 5603 { 5604 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5605 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5606 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5607 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5608 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5609 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5610 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5611 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5612 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x)); 5613 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x)); 5614 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x)); 5615 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x)); 5616 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x)); 5617 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x)); 5618 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x)); 5619 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x)); 5620 5621 c0 = _mm_add_ps(c0, c1); 5622 c2 = _mm_add_ps(c2, c3); 5623 c4 = _mm_add_ps(c4, c5); 5624 c6 = _mm_add_ps(c6, c7); 5625 c8 = _mm_add_ps(c8, c9); 5626 cA = _mm_add_ps(cA, cB); 5627 cC = _mm_add_ps(cC, cD); 5628 cE = _mm_add_ps(cE, cF); 5629 c0 = _mm_add_ps(c0, c2); 5630 c4 = _mm_add_ps(c4, c6); 5631 c8 = _mm_add_ps(c8, cA); 5632 cC = _mm_add_ps(cC, cE); 5633 c0 = _mm_add_ps(c0, c4); 5634 c8 = _mm_add_ps(c8, cC); 5635 c0 = _mm_add_ps(c0, c8); 5636 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5637 5638 _mm_store_ps((float*)(source0 + 16 * x), c0); 5639 } 5640 5641 source0 += pitch; 5642 source1 += pitch; 5643 source2 += pitch; 5644 source3 += pitch; 5645 source4 += pitch; 5646 source5 += pitch; 5647 source6 += pitch; 5648 source7 += pitch; 5649 source8 += pitch; 5650 source9 += pitch; 5651 sourceA += pitch; 5652 sourceB += pitch; 5653 sourceC += pitch; 5654 sourceD += pitch; 5655 sourceE += pitch; 5656 sourceF += pitch; 5657 } 5658 } 5659 else ASSERT(false); 5660 } 5661 else 5662 #endif 5663 { 5664 if(internal.samples == 2) 5665 { 5666 for(int y = 0; y < height; y++) 5667 { 5668 for(int x = 0; x < 4 * width; x++) 5669 { 5670 float c0 = *(float*)(source0 + 4 * x); 5671 float c1 = *(float*)(source1 + 4 * x); 5672 5673 c0 = c0 + c1; 5674 c0 *= 1.0f / 2.0f; 5675 5676 *(float*)(source0 + 4 * x) = c0; 5677 } 5678 5679 source0 += pitch; 5680 source1 += pitch; 5681 } 5682 } 5683 else if(internal.samples == 4) 5684 { 5685 for(int y = 0; y < height; y++) 5686 { 5687 for(int x = 0; x < 4 * width; x++) 5688 { 5689 float c0 = *(float*)(source0 + 4 * x); 5690 float c1 = *(float*)(source1 + 4 * x); 5691 float c2 = *(float*)(source2 + 4 * x); 5692 float c3 = *(float*)(source3 + 4 * x); 5693 5694 c0 = c0 + c1; 5695 c2 = c2 + c3; 5696 c0 = c0 + c2; 5697 c0 *= 1.0f / 4.0f; 5698 5699 *(float*)(source0 + 4 * x) = c0; 5700 } 5701 5702 source0 += pitch; 5703 source1 += pitch; 5704 source2 += pitch; 5705 source3 += pitch; 5706 } 5707 } 5708 else if(internal.samples == 8) 5709 { 5710 for(int y = 0; y < height; y++) 5711 { 5712 for(int x = 0; x < 4 * width; x++) 5713 { 5714 float c0 = *(float*)(source0 + 4 * x); 5715 float c1 = *(float*)(source1 + 4 * x); 5716 float c2 = *(float*)(source2 + 4 * x); 5717 float c3 = *(float*)(source3 + 4 * x); 5718 float c4 = *(float*)(source4 + 4 * x); 5719 float c5 = *(float*)(source5 + 4 * x); 5720 float c6 = *(float*)(source6 + 4 * x); 5721 float c7 = *(float*)(source7 + 4 * x); 5722 5723 c0 = c0 + c1; 5724 c2 = c2 + c3; 5725 c4 = c4 + c5; 5726 c6 = c6 + c7; 5727 c0 = c0 + c2; 5728 c4 = c4 + c6; 5729 c0 = c0 + c4; 5730 c0 *= 1.0f / 8.0f; 5731 5732 *(float*)(source0 + 4 * x) = c0; 5733 } 5734 5735 source0 += pitch; 5736 source1 += pitch; 5737 source2 += pitch; 5738 source3 += pitch; 5739 source4 += pitch; 5740 source5 += pitch; 5741 source6 += pitch; 5742 source7 += pitch; 5743 } 5744 } 5745 else if(internal.samples == 16) 5746 { 5747 for(int y = 0; y < height; y++) 5748 { 5749 for(int x = 0; x < 4 * width; x++) 5750 { 5751 float c0 = *(float*)(source0 + 4 * x); 5752 float c1 = *(float*)(source1 + 4 * x); 5753 float c2 = *(float*)(source2 + 4 * x); 5754 float c3 = *(float*)(source3 + 4 * x); 5755 float c4 = *(float*)(source4 + 4 * x); 5756 float c5 = *(float*)(source5 + 4 * x); 5757 float c6 = *(float*)(source6 + 4 * x); 5758 float c7 = *(float*)(source7 + 4 * x); 5759 float c8 = *(float*)(source8 + 4 * x); 5760 float c9 = *(float*)(source9 + 4 * x); 5761 float cA = *(float*)(sourceA + 4 * x); 5762 float cB = *(float*)(sourceB + 4 * x); 5763 float cC = *(float*)(sourceC + 4 * x); 5764 float cD = *(float*)(sourceD + 4 * x); 5765 float cE = *(float*)(sourceE + 4 * x); 5766 float cF = *(float*)(sourceF + 4 * x); 5767 5768 c0 = c0 + c1; 5769 c2 = c2 + c3; 5770 c4 = c4 + c5; 5771 c6 = c6 + c7; 5772 c8 = c8 + c9; 5773 cA = cA + cB; 5774 cC = cC + cD; 5775 cE = cE + cF; 5776 c0 = c0 + c2; 5777 c4 = c4 + c6; 5778 c8 = c8 + cA; 5779 cC = cC + cE; 5780 c0 = c0 + c4; 5781 c8 = c8 + cC; 5782 c0 = c0 + c8; 5783 c0 *= 1.0f / 16.0f; 5784 5785 *(float*)(source0 + 4 * x) = c0; 5786 } 5787 5788 source0 += pitch; 5789 source1 += pitch; 5790 source2 += pitch; 5791 source3 += pitch; 5792 source4 += pitch; 5793 source5 += pitch; 5794 source6 += pitch; 5795 source7 += pitch; 5796 source8 += pitch; 5797 source9 += pitch; 5798 sourceA += pitch; 5799 sourceB += pitch; 5800 sourceC += pitch; 5801 sourceD += pitch; 5802 sourceE += pitch; 5803 sourceF += pitch; 5804 } 5805 } 5806 else ASSERT(false); 5807 } 5808 } 5809 else if(internal.format == FORMAT_R5G6B5) 5810 { 5811 #if defined(__i386__) || defined(__x86_64__) 5812 if(CPUID::supportsSSE2() && (width % 8) == 0) 5813 { 5814 if(internal.samples == 2) 5815 { 5816 for(int y = 0; y < height; y++) 5817 { 5818 for(int x = 0; x < width; x += 8) 5819 { 5820 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5821 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5822 5823 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5824 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5825 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5826 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5827 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5828 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5829 5830 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5831 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5832 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5833 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5834 c0 = _mm_or_si128(c0, c1); 5835 5836 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5837 } 5838 5839 source0 += pitch; 5840 source1 += pitch; 5841 } 5842 } 5843 else if(internal.samples == 4) 5844 { 5845 for(int y = 0; y < height; y++) 5846 { 5847 for(int x = 0; x < width; x += 8) 5848 { 5849 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5850 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5851 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5852 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5853 5854 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5855 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5856 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5857 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5858 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5859 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5860 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5861 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5862 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5863 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5864 5865 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5866 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5867 c0 = _mm_avg_epu8(c0, c2); 5868 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5869 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5870 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5871 c1 = _mm_avg_epu16(c1, c3); 5872 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5873 c0 = _mm_or_si128(c0, c1); 5874 5875 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5876 } 5877 5878 source0 += pitch; 5879 source1 += pitch; 5880 source2 += pitch; 5881 source3 += pitch; 5882 } 5883 } 5884 else if(internal.samples == 8) 5885 { 5886 for(int y = 0; y < height; y++) 5887 { 5888 for(int x = 0; x < width; x += 8) 5889 { 5890 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5891 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5892 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5893 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5894 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5895 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5896 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5897 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5898 5899 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5900 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5901 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5902 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5903 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5904 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5905 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5906 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5907 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5908 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5909 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5910 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5911 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5912 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5913 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5914 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5915 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5916 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5917 5918 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5919 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5920 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5921 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5922 c0 = _mm_avg_epu8(c0, c2); 5923 c4 = _mm_avg_epu8(c4, c6); 5924 c0 = _mm_avg_epu8(c0, c4); 5925 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5926 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5927 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5928 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5929 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5930 c1 = _mm_avg_epu16(c1, c3); 5931 c5 = _mm_avg_epu16(c5, c7); 5932 c1 = _mm_avg_epu16(c1, c5); 5933 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5934 c0 = _mm_or_si128(c0, c1); 5935 5936 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5937 } 5938 5939 source0 += pitch; 5940 source1 += pitch; 5941 source2 += pitch; 5942 source3 += pitch; 5943 source4 += pitch; 5944 source5 += pitch; 5945 source6 += pitch; 5946 source7 += pitch; 5947 } 5948 } 5949 else if(internal.samples == 16) 5950 { 5951 for(int y = 0; y < height; y++) 5952 { 5953 for(int x = 0; x < width; x += 8) 5954 { 5955 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5956 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5957 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5958 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5959 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5960 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5961 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5962 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5963 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x)); 5964 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x)); 5965 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x)); 5966 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x)); 5967 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x)); 5968 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x)); 5969 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x)); 5970 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x)); 5971 5972 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5973 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5974 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5975 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5976 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5977 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5978 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5979 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5980 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5981 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5982 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5983 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5984 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5985 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5986 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5987 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5988 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5989 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5990 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b)); 5991 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_)); 5992 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b)); 5993 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_)); 5994 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b)); 5995 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_)); 5996 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b)); 5997 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_)); 5998 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b)); 5999 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_)); 6000 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b)); 6001 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_)); 6002 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b)); 6003 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_)); 6004 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b)); 6005 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_)); 6006 6007 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 6008 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 6009 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 6010 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 6011 c8 = _mm_avg_epu8(c8_r_b, c9_r_b); 6012 cA = _mm_avg_epu8(cA_r_b, cB_r_b); 6013 cC = _mm_avg_epu8(cC_r_b, cD_r_b); 6014 cE = _mm_avg_epu8(cE_r_b, cF_r_b); 6015 c0 = _mm_avg_epu8(c0, c2); 6016 c4 = _mm_avg_epu8(c4, c6); 6017 c8 = _mm_avg_epu8(c8, cA); 6018 cC = _mm_avg_epu8(cC, cE); 6019 c0 = _mm_avg_epu8(c0, c4); 6020 c8 = _mm_avg_epu8(c8, cC); 6021 c0 = _mm_avg_epu8(c0, c8); 6022 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 6023 c1 = _mm_avg_epu16(c0__g_, c1__g_); 6024 c3 = _mm_avg_epu16(c2__g_, c3__g_); 6025 c5 = _mm_avg_epu16(c4__g_, c5__g_); 6026 c7 = _mm_avg_epu16(c6__g_, c7__g_); 6027 c9 = _mm_avg_epu16(c8__g_, c9__g_); 6028 cB = _mm_avg_epu16(cA__g_, cB__g_); 6029 cD = _mm_avg_epu16(cC__g_, cD__g_); 6030 cF = _mm_avg_epu16(cE__g_, cF__g_); 6031 c1 = _mm_avg_epu8(c1, c3); 6032 c5 = _mm_avg_epu8(c5, c7); 6033 c9 = _mm_avg_epu8(c9, cB); 6034 cD = _mm_avg_epu8(cD, cF); 6035 c1 = _mm_avg_epu8(c1, c5); 6036 c9 = _mm_avg_epu8(c9, cD); 6037 c1 = _mm_avg_epu8(c1, c9); 6038 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 6039 c0 = _mm_or_si128(c0, c1); 6040 6041 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 6042 } 6043 6044 source0 += pitch; 6045 source1 += pitch; 6046 source2 += pitch; 6047 source3 += pitch; 6048 source4 += pitch; 6049 source5 += pitch; 6050 source6 += pitch; 6051 source7 += pitch; 6052 source8 += pitch; 6053 source9 += pitch; 6054 sourceA += pitch; 6055 sourceB += pitch; 6056 sourceC += pitch; 6057 sourceD += pitch; 6058 sourceE += pitch; 6059 sourceF += pitch; 6060 } 6061 } 6062 else ASSERT(false); 6063 } 6064 else 6065 #endif 6066 { 6067 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821)) 6068 6069 if(internal.samples == 2) 6070 { 6071 for(int y = 0; y < height; y++) 6072 { 6073 for(int x = 0; x < width; x++) 6074 { 6075 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6076 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6077 6078 c0 = AVERAGE(c0, c1); 6079 6080 *(unsigned short*)(source0 + 2 * x) = c0; 6081 } 6082 6083 source0 += pitch; 6084 source1 += pitch; 6085 } 6086 } 6087 else if(internal.samples == 4) 6088 { 6089 for(int y = 0; y < height; y++) 6090 { 6091 for(int x = 0; x < width; x++) 6092 { 6093 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6094 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6095 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 6096 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 6097 6098 c0 = AVERAGE(c0, c1); 6099 c2 = AVERAGE(c2, c3); 6100 c0 = AVERAGE(c0, c2); 6101 6102 *(unsigned short*)(source0 + 2 * x) = c0; 6103 } 6104 6105 source0 += pitch; 6106 source1 += pitch; 6107 source2 += pitch; 6108 source3 += pitch; 6109 } 6110 } 6111 else if(internal.samples == 8) 6112 { 6113 for(int y = 0; y < height; y++) 6114 { 6115 for(int x = 0; x < width; x++) 6116 { 6117 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6118 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6119 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 6120 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 6121 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 6122 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 6123 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 6124 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 6125 6126 c0 = AVERAGE(c0, c1); 6127 c2 = AVERAGE(c2, c3); 6128 c4 = AVERAGE(c4, c5); 6129 c6 = AVERAGE(c6, c7); 6130 c0 = AVERAGE(c0, c2); 6131 c4 = AVERAGE(c4, c6); 6132 c0 = AVERAGE(c0, c4); 6133 6134 *(unsigned short*)(source0 + 2 * x) = c0; 6135 } 6136 6137 source0 += pitch; 6138 source1 += pitch; 6139 source2 += pitch; 6140 source3 += pitch; 6141 source4 += pitch; 6142 source5 += pitch; 6143 source6 += pitch; 6144 source7 += pitch; 6145 } 6146 } 6147 else if(internal.samples == 16) 6148 { 6149 for(int y = 0; y < height; y++) 6150 { 6151 for(int x = 0; x < width; x++) 6152 { 6153 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6154 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6155 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 6156 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 6157 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 6158 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 6159 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 6160 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 6161 unsigned short c8 = *(unsigned short*)(source8 + 2 * x); 6162 unsigned short c9 = *(unsigned short*)(source9 + 2 * x); 6163 unsigned short cA = *(unsigned short*)(sourceA + 2 * x); 6164 unsigned short cB = *(unsigned short*)(sourceB + 2 * x); 6165 unsigned short cC = *(unsigned short*)(sourceC + 2 * x); 6166 unsigned short cD = *(unsigned short*)(sourceD + 2 * x); 6167 unsigned short cE = *(unsigned short*)(sourceE + 2 * x); 6168 unsigned short cF = *(unsigned short*)(sourceF + 2 * x); 6169 6170 c0 = AVERAGE(c0, c1); 6171 c2 = AVERAGE(c2, c3); 6172 c4 = AVERAGE(c4, c5); 6173 c6 = AVERAGE(c6, c7); 6174 c8 = AVERAGE(c8, c9); 6175 cA = AVERAGE(cA, cB); 6176 cC = AVERAGE(cC, cD); 6177 cE = AVERAGE(cE, cF); 6178 c0 = AVERAGE(c0, c2); 6179 c4 = AVERAGE(c4, c6); 6180 c8 = AVERAGE(c8, cA); 6181 cC = AVERAGE(cC, cE); 6182 c0 = AVERAGE(c0, c4); 6183 c8 = AVERAGE(c8, cC); 6184 c0 = AVERAGE(c0, c8); 6185 6186 *(unsigned short*)(source0 + 2 * x) = c0; 6187 } 6188 6189 source0 += pitch; 6190 source1 += pitch; 6191 source2 += pitch; 6192 source3 += pitch; 6193 source4 += pitch; 6194 source5 += pitch; 6195 source6 += pitch; 6196 source7 += pitch; 6197 source8 += pitch; 6198 source9 += pitch; 6199 sourceA += pitch; 6200 sourceB += pitch; 6201 sourceC += pitch; 6202 sourceD += pitch; 6203 sourceE += pitch; 6204 sourceF += pitch; 6205 } 6206 } 6207 else ASSERT(false); 6208 6209 #undef AVERAGE 6210 } 6211 } 6212 else 6213 { 6214 // UNIMPLEMENTED(); 6215 } 6216 } 6217 } 6218