1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Surface.hpp" 16 17 #include "Color.hpp" 18 #include "Context.hpp" 19 #include "ETC_Decoder.hpp" 20 #include "Renderer.hpp" 21 #include "Common/Half.hpp" 22 #include "Common/Memory.hpp" 23 #include "Common/CPUID.hpp" 24 #include "Common/Resource.hpp" 25 #include "Common/Debug.hpp" 26 #include "Reactor/Reactor.hpp" 27 28 #if defined(__i386__) || defined(__x86_64__) 29 #include <xmmintrin.h> 30 #include <emmintrin.h> 31 #endif 32 33 #undef min 34 #undef max 35 36 namespace sw 37 { 38 extern bool quadLayoutEnabled; 39 extern bool complementaryDepthBuffer; 40 extern TranscendentalPrecision logPrecision; 41 42 unsigned int *Surface::palette = 0; 43 unsigned int Surface::paletteID = 0; 44 write(int x,int y,int z,const Color<float> & color)45 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color) 46 { 47 ASSERT((x >= -border) && (x < (width + border))); 48 ASSERT((y >= -border) && (y < (height + border))); 49 ASSERT((z >= 0) && (z < depth)); 50 51 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB; 52 53 for(int i = 0; i < samples; i++) 54 { 55 write(element, color); 56 element += sliceB; 57 } 58 } 59 write(int x,int y,const Color<float> & color)60 void Surface::Buffer::write(int x, int y, const Color<float> &color) 61 { 62 ASSERT((x >= -border) && (x < (width + border))); 63 ASSERT((y >= -border) && (y < (height + border))); 64 65 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB; 66 67 for(int i = 0; i < samples; i++) 68 { 69 write(element, color); 70 element += sliceB; 71 } 72 } 73 write(void * element,const Color<float> & color)74 inline void Surface::Buffer::write(void *element, const Color<float> &color) 75 { 76 float r = color.r; 77 float g = color.g; 78 float b = color.b; 79 float a = color.a; 80 81 if(isSRGBformat(format)) 82 { 83 r = linearToSRGB(r); 84 g = linearToSRGB(g); 85 b = linearToSRGB(b); 86 } 87 88 switch(format) 89 { 90 case FORMAT_A8: 91 *(unsigned char*)element = unorm<8>(a); 92 break; 93 case FORMAT_R8_SNORM: 94 *(char*)element = snorm<8>(r); 95 break; 96 case FORMAT_R8: 97 *(unsigned char*)element = unorm<8>(r); 98 break; 99 case FORMAT_R8I: 100 *(char*)element = scast<8>(r); 101 break; 102 case FORMAT_R8UI: 103 *(unsigned char*)element = ucast<8>(r); 104 break; 105 case FORMAT_R16I: 106 *(short*)element = scast<16>(r); 107 break; 108 case FORMAT_R16UI: 109 *(unsigned short*)element = ucast<16>(r); 110 break; 111 case FORMAT_R32I: 112 *(int*)element = static_cast<int>(r); 113 break; 114 case FORMAT_R32UI: 115 *(unsigned int*)element = static_cast<unsigned int>(r); 116 break; 117 case FORMAT_R3G3B2: 118 *(unsigned char*)element = (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0); 119 break; 120 case FORMAT_A8R3G3B2: 121 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0); 122 break; 123 case FORMAT_X4R4G4B4: 124 *(unsigned short*)element = 0xF000 | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0); 125 break; 126 case FORMAT_A4R4G4B4: 127 *(unsigned short*)element = (unorm<4>(a) << 12) | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0); 128 break; 129 case FORMAT_R4G4B4A4: 130 *(unsigned short*)element = (unorm<4>(r) << 12) | (unorm<4>(g) << 8) | (unorm<4>(b) << 4) | (unorm<4>(a) << 0); 131 break; 132 case FORMAT_R5G6B5: 133 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<6>(g) << 5) | (unorm<5>(b) << 0); 134 break; 135 case FORMAT_A1R5G5B5: 136 *(unsigned short*)element = (unorm<1>(a) << 15) | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0); 137 break; 138 case FORMAT_R5G5B5A1: 139 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<5>(g) << 6) | (unorm<5>(b) << 1) | (unorm<5>(a) << 0); 140 break; 141 case FORMAT_X1R5G5B5: 142 *(unsigned short*)element = 0x8000 | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0); 143 break; 144 case FORMAT_A8R8G8B8: 145 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0); 146 break; 147 case FORMAT_X8R8G8B8: 148 *(unsigned int*)element = 0xFF000000 | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0); 149 break; 150 case FORMAT_A8B8G8R8_SNORM: 151 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(a)) << 24) | 152 (static_cast<unsigned int>(snorm<8>(b)) << 16) | 153 (static_cast<unsigned int>(snorm<8>(g)) << 8) | 154 (static_cast<unsigned int>(snorm<8>(r)) << 0); 155 break; 156 case FORMAT_A8B8G8R8: 157 case FORMAT_SRGB8_A8: 158 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 159 break; 160 case FORMAT_A8B8G8R8I: 161 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(a)) << 24) | 162 (static_cast<unsigned int>(scast<8>(b)) << 16) | 163 (static_cast<unsigned int>(scast<8>(g)) << 8) | 164 (static_cast<unsigned int>(scast<8>(r)) << 0); 165 break; 166 case FORMAT_A8B8G8R8UI: 167 *(unsigned int*)element = (ucast<8>(a) << 24) | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 168 break; 169 case FORMAT_X8B8G8R8_SNORM: 170 *(unsigned int*)element = 0x7F000000 | 171 (static_cast<unsigned int>(snorm<8>(b)) << 16) | 172 (static_cast<unsigned int>(snorm<8>(g)) << 8) | 173 (static_cast<unsigned int>(snorm<8>(r)) << 0); 174 break; 175 case FORMAT_X8B8G8R8: 176 case FORMAT_SRGB8_X8: 177 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 178 break; 179 case FORMAT_X8B8G8R8I: 180 *(unsigned int*)element = 0x7F000000 | 181 (static_cast<unsigned int>(scast<8>(b)) << 16) | 182 (static_cast<unsigned int>(scast<8>(g)) << 8) | 183 (static_cast<unsigned int>(scast<8>(r)) << 0); 184 case FORMAT_X8B8G8R8UI: 185 *(unsigned int*)element = 0xFF000000 | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 186 break; 187 case FORMAT_A2R10G10B10: 188 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(r) << 20) | (unorm<10>(g) << 10) | (unorm<10>(b) << 0); 189 break; 190 case FORMAT_A2B10G10R10: 191 case FORMAT_A2B10G10R10UI: 192 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(b) << 20) | (unorm<10>(g) << 10) | (unorm<10>(r) << 0); 193 break; 194 case FORMAT_G8R8_SNORM: 195 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(g)) << 8) | 196 (static_cast<unsigned short>(snorm<8>(r)) << 0); 197 break; 198 case FORMAT_G8R8: 199 *(unsigned short*)element = (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 200 break; 201 case FORMAT_G8R8I: 202 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(g)) << 8) | 203 (static_cast<unsigned short>(scast<8>(r)) << 0); 204 break; 205 case FORMAT_G8R8UI: 206 *(unsigned short*)element = (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 207 break; 208 case FORMAT_G16R16: 209 *(unsigned int*)element = (unorm<16>(g) << 16) | (unorm<16>(r) << 0); 210 break; 211 case FORMAT_G16R16I: 212 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(g)) << 16) | 213 (static_cast<unsigned int>(scast<16>(r)) << 0); 214 break; 215 case FORMAT_G16R16UI: 216 *(unsigned int*)element = (ucast<16>(g) << 16) | (ucast<16>(r) << 0); 217 break; 218 case FORMAT_G32R32I: 219 case FORMAT_G32R32UI: 220 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 221 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 222 break; 223 case FORMAT_A16B16G16R16: 224 ((unsigned short*)element)[0] = unorm<16>(r); 225 ((unsigned short*)element)[1] = unorm<16>(g); 226 ((unsigned short*)element)[2] = unorm<16>(b); 227 ((unsigned short*)element)[3] = unorm<16>(a); 228 break; 229 case FORMAT_A16B16G16R16I: 230 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r)); 231 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g)); 232 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b)); 233 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(a)); 234 break; 235 case FORMAT_A16B16G16R16UI: 236 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r)); 237 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g)); 238 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b)); 239 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(a)); 240 break; 241 case FORMAT_X16B16G16R16I: 242 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r)); 243 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g)); 244 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b)); 245 break; 246 case FORMAT_X16B16G16R16UI: 247 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r)); 248 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g)); 249 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b)); 250 break; 251 case FORMAT_A32B32G32R32I: 252 case FORMAT_A32B32G32R32UI: 253 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 254 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 255 ((unsigned int*)element)[2] = static_cast<unsigned int>(b); 256 ((unsigned int*)element)[3] = static_cast<unsigned int>(a); 257 break; 258 case FORMAT_X32B32G32R32I: 259 case FORMAT_X32B32G32R32UI: 260 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 261 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 262 ((unsigned int*)element)[2] = static_cast<unsigned int>(b); 263 break; 264 case FORMAT_V8U8: 265 *(unsigned short*)element = (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 266 break; 267 case FORMAT_L6V5U5: 268 *(unsigned short*)element = (unorm<6>(b) << 10) | (snorm<5>(g) << 5) | (snorm<5>(r) << 0); 269 break; 270 case FORMAT_Q8W8V8U8: 271 *(unsigned int*)element = (snorm<8>(a) << 24) | (snorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 272 break; 273 case FORMAT_X8L8V8U8: 274 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 275 break; 276 case FORMAT_V16U16: 277 *(unsigned int*)element = (snorm<16>(g) << 16) | (snorm<16>(r) << 0); 278 break; 279 case FORMAT_A2W10V10U10: 280 *(unsigned int*)element = (unorm<2>(a) << 30) | (snorm<10>(b) << 20) | (snorm<10>(g) << 10) | (snorm<10>(r) << 0); 281 break; 282 case FORMAT_A16W16V16U16: 283 ((unsigned short*)element)[0] = snorm<16>(r); 284 ((unsigned short*)element)[1] = snorm<16>(g); 285 ((unsigned short*)element)[2] = snorm<16>(b); 286 ((unsigned short*)element)[3] = unorm<16>(a); 287 break; 288 case FORMAT_Q16W16V16U16: 289 ((unsigned short*)element)[0] = snorm<16>(r); 290 ((unsigned short*)element)[1] = snorm<16>(g); 291 ((unsigned short*)element)[2] = snorm<16>(b); 292 ((unsigned short*)element)[3] = snorm<16>(a); 293 break; 294 case FORMAT_R8G8B8: 295 ((unsigned char*)element)[0] = unorm<8>(b); 296 ((unsigned char*)element)[1] = unorm<8>(g); 297 ((unsigned char*)element)[2] = unorm<8>(r); 298 break; 299 case FORMAT_B8G8R8: 300 ((unsigned char*)element)[0] = unorm<8>(r); 301 ((unsigned char*)element)[1] = unorm<8>(g); 302 ((unsigned char*)element)[2] = unorm<8>(b); 303 break; 304 case FORMAT_R16F: 305 *(half*)element = (half)r; 306 break; 307 case FORMAT_A16F: 308 *(half*)element = (half)a; 309 break; 310 case FORMAT_G16R16F: 311 ((half*)element)[0] = (half)r; 312 ((half*)element)[1] = (half)g; 313 break; 314 case FORMAT_X16B16G16R16F_UNSIGNED: 315 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f); 316 // Fall through to FORMAT_X16B16G16R16F. 317 case FORMAT_X16B16G16R16F: 318 ((half*)element)[3] = 1.0f; 319 // Fall through to FORMAT_B16G16R16F. 320 case FORMAT_B16G16R16F: 321 ((half*)element)[0] = (half)r; 322 ((half*)element)[1] = (half)g; 323 ((half*)element)[2] = (half)b; 324 break; 325 case FORMAT_A16B16G16R16F: 326 ((half*)element)[0] = (half)r; 327 ((half*)element)[1] = (half)g; 328 ((half*)element)[2] = (half)b; 329 ((half*)element)[3] = (half)a; 330 break; 331 case FORMAT_A32F: 332 *(float*)element = a; 333 break; 334 case FORMAT_R32F: 335 *(float*)element = r; 336 break; 337 case FORMAT_G32R32F: 338 ((float*)element)[0] = r; 339 ((float*)element)[1] = g; 340 break; 341 case FORMAT_X32B32G32R32F_UNSIGNED: 342 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f); 343 // Fall through to FORMAT_X32B32G32R32F. 344 case FORMAT_X32B32G32R32F: 345 ((float*)element)[3] = 1.0f; 346 // Fall through to FORMAT_B32G32R32F. 347 case FORMAT_B32G32R32F: 348 ((float*)element)[0] = r; 349 ((float*)element)[1] = g; 350 ((float*)element)[2] = b; 351 break; 352 case FORMAT_A32B32G32R32F: 353 ((float*)element)[0] = r; 354 ((float*)element)[1] = g; 355 ((float*)element)[2] = b; 356 ((float*)element)[3] = a; 357 break; 358 case FORMAT_D32F: 359 case FORMAT_D32FS8: 360 case FORMAT_D32F_LOCKABLE: 361 case FORMAT_D32FS8_TEXTURE: 362 case FORMAT_D32F_SHADOW: 363 case FORMAT_D32FS8_SHADOW: 364 *((float*)element) = r; 365 break; 366 case FORMAT_D32F_COMPLEMENTARY: 367 case FORMAT_D32FS8_COMPLEMENTARY: 368 *((float*)element) = 1 - r; 369 break; 370 case FORMAT_S8: 371 *((unsigned char*)element) = unorm<8>(r); 372 break; 373 case FORMAT_L8: 374 *(unsigned char*)element = unorm<8>(r); 375 break; 376 case FORMAT_A4L4: 377 *(unsigned char*)element = (unorm<4>(a) << 4) | (unorm<4>(r) << 0); 378 break; 379 case FORMAT_L16: 380 *(unsigned short*)element = unorm<16>(r); 381 break; 382 case FORMAT_A8L8: 383 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<8>(r) << 0); 384 break; 385 case FORMAT_L16F: 386 *(half*)element = (half)r; 387 break; 388 case FORMAT_A16L16F: 389 ((half*)element)[0] = (half)r; 390 ((half*)element)[1] = (half)a; 391 break; 392 case FORMAT_L32F: 393 *(float*)element = r; 394 break; 395 case FORMAT_A32L32F: 396 ((float*)element)[0] = r; 397 ((float*)element)[1] = a; 398 break; 399 default: 400 ASSERT(false); 401 } 402 } 403 read(int x,int y,int z) const404 Color<float> Surface::Buffer::read(int x, int y, int z) const 405 { 406 ASSERT((x >= -border) && (x < (width + border))); 407 ASSERT((y >= -border) && (y < (height + border))); 408 ASSERT((z >= 0) && (z < depth)); 409 410 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB; 411 412 return read(element); 413 } 414 read(int x,int y) const415 Color<float> Surface::Buffer::read(int x, int y) const 416 { 417 ASSERT((x >= -border) && (x < (width + border))); 418 ASSERT((y >= -border) && (y < (height + border))); 419 420 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB; 421 422 return read(element); 423 } 424 read(void * element) const425 inline Color<float> Surface::Buffer::read(void *element) const 426 { 427 float r = 0.0f; 428 float g = 0.0f; 429 float b = 0.0f; 430 float a = 1.0f; 431 432 switch(format) 433 { 434 case FORMAT_P8: 435 { 436 ASSERT(palette); 437 438 unsigned int abgr = palette[*(unsigned char*)element]; 439 440 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 441 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 442 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 443 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 444 } 445 break; 446 case FORMAT_A8P8: 447 { 448 ASSERT(palette); 449 450 unsigned int bgr = palette[((unsigned char*)element)[0]]; 451 452 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF); 453 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00); 454 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000); 455 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 456 } 457 break; 458 case FORMAT_A8: 459 r = 0; 460 g = 0; 461 b = 0; 462 a = *(unsigned char*)element * (1.0f / 0xFF); 463 break; 464 case FORMAT_R8_SNORM: 465 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f); 466 break; 467 case FORMAT_R8: 468 r = *(unsigned char*)element * (1.0f / 0xFF); 469 break; 470 case FORMAT_R8I: 471 r = *(signed char*)element; 472 break; 473 case FORMAT_R8UI: 474 r = *(unsigned char*)element; 475 break; 476 case FORMAT_R3G3B2: 477 { 478 unsigned char rgb = *(unsigned char*)element; 479 480 r = (rgb & 0xE0) * (1.0f / 0xE0); 481 g = (rgb & 0x1C) * (1.0f / 0x1C); 482 b = (rgb & 0x03) * (1.0f / 0x03); 483 } 484 break; 485 case FORMAT_A8R3G3B2: 486 { 487 unsigned short argb = *(unsigned short*)element; 488 489 a = (argb & 0xFF00) * (1.0f / 0xFF00); 490 r = (argb & 0x00E0) * (1.0f / 0x00E0); 491 g = (argb & 0x001C) * (1.0f / 0x001C); 492 b = (argb & 0x0003) * (1.0f / 0x0003); 493 } 494 break; 495 case FORMAT_X4R4G4B4: 496 { 497 unsigned short rgb = *(unsigned short*)element; 498 499 r = (rgb & 0x0F00) * (1.0f / 0x0F00); 500 g = (rgb & 0x00F0) * (1.0f / 0x00F0); 501 b = (rgb & 0x000F) * (1.0f / 0x000F); 502 } 503 break; 504 case FORMAT_A4R4G4B4: 505 { 506 unsigned short argb = *(unsigned short*)element; 507 508 a = (argb & 0xF000) * (1.0f / 0xF000); 509 r = (argb & 0x0F00) * (1.0f / 0x0F00); 510 g = (argb & 0x00F0) * (1.0f / 0x00F0); 511 b = (argb & 0x000F) * (1.0f / 0x000F); 512 } 513 break; 514 case FORMAT_R4G4B4A4: 515 { 516 unsigned short rgba = *(unsigned short*)element; 517 518 r = (rgba & 0xF000) * (1.0f / 0xF000); 519 g = (rgba & 0x0F00) * (1.0f / 0x0F00); 520 b = (rgba & 0x00F0) * (1.0f / 0x00F0); 521 a = (rgba & 0x000F) * (1.0f / 0x000F); 522 } 523 break; 524 case FORMAT_R5G6B5: 525 { 526 unsigned short rgb = *(unsigned short*)element; 527 528 r = (rgb & 0xF800) * (1.0f / 0xF800); 529 g = (rgb & 0x07E0) * (1.0f / 0x07E0); 530 b = (rgb & 0x001F) * (1.0f / 0x001F); 531 } 532 break; 533 case FORMAT_A1R5G5B5: 534 { 535 unsigned short argb = *(unsigned short*)element; 536 537 a = (argb & 0x8000) * (1.0f / 0x8000); 538 r = (argb & 0x7C00) * (1.0f / 0x7C00); 539 g = (argb & 0x03E0) * (1.0f / 0x03E0); 540 b = (argb & 0x001F) * (1.0f / 0x001F); 541 } 542 break; 543 case FORMAT_R5G5B5A1: 544 { 545 unsigned short rgba = *(unsigned short*)element; 546 547 r = (rgba & 0xF800) * (1.0f / 0xF800); 548 g = (rgba & 0x07C0) * (1.0f / 0x07C0); 549 b = (rgba & 0x003E) * (1.0f / 0x003E); 550 a = (rgba & 0x0001) * (1.0f / 0x0001); 551 } 552 break; 553 case FORMAT_X1R5G5B5: 554 { 555 unsigned short xrgb = *(unsigned short*)element; 556 557 r = (xrgb & 0x7C00) * (1.0f / 0x7C00); 558 g = (xrgb & 0x03E0) * (1.0f / 0x03E0); 559 b = (xrgb & 0x001F) * (1.0f / 0x001F); 560 } 561 break; 562 case FORMAT_A8R8G8B8: 563 { 564 unsigned int argb = *(unsigned int*)element; 565 566 a = (argb & 0xFF000000) * (1.0f / 0xFF000000); 567 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000); 568 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00); 569 b = (argb & 0x000000FF) * (1.0f / 0x000000FF); 570 } 571 break; 572 case FORMAT_X8R8G8B8: 573 { 574 unsigned int xrgb = *(unsigned int*)element; 575 576 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000); 577 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00); 578 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF); 579 } 580 break; 581 case FORMAT_A8B8G8R8_SNORM: 582 { 583 signed char* abgr = (signed char*)element; 584 585 r = max(abgr[0] * (1.0f / 0x7F), -1.0f); 586 g = max(abgr[1] * (1.0f / 0x7F), -1.0f); 587 b = max(abgr[2] * (1.0f / 0x7F), -1.0f); 588 a = max(abgr[3] * (1.0f / 0x7F), -1.0f); 589 } 590 break; 591 case FORMAT_A8B8G8R8: 592 case FORMAT_SRGB8_A8: 593 { 594 unsigned int abgr = *(unsigned int*)element; 595 596 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 597 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 598 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 599 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 600 } 601 break; 602 case FORMAT_A8B8G8R8I: 603 { 604 signed char* abgr = (signed char*)element; 605 606 r = abgr[0]; 607 g = abgr[1]; 608 b = abgr[2]; 609 a = abgr[3]; 610 } 611 break; 612 case FORMAT_A8B8G8R8UI: 613 { 614 unsigned char* abgr = (unsigned char*)element; 615 616 r = abgr[0]; 617 g = abgr[1]; 618 b = abgr[2]; 619 a = abgr[3]; 620 } 621 break; 622 case FORMAT_X8B8G8R8_SNORM: 623 { 624 signed char* bgr = (signed char*)element; 625 626 r = max(bgr[0] * (1.0f / 0x7F), -1.0f); 627 g = max(bgr[1] * (1.0f / 0x7F), -1.0f); 628 b = max(bgr[2] * (1.0f / 0x7F), -1.0f); 629 } 630 break; 631 case FORMAT_X8B8G8R8: 632 case FORMAT_SRGB8_X8: 633 { 634 unsigned int xbgr = *(unsigned int*)element; 635 636 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000); 637 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00); 638 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF); 639 } 640 break; 641 case FORMAT_X8B8G8R8I: 642 { 643 signed char* bgr = (signed char*)element; 644 645 r = bgr[0]; 646 g = bgr[1]; 647 b = bgr[2]; 648 } 649 break; 650 case FORMAT_X8B8G8R8UI: 651 { 652 unsigned char* bgr = (unsigned char*)element; 653 654 r = bgr[0]; 655 g = bgr[1]; 656 b = bgr[2]; 657 } 658 break; 659 case FORMAT_G8R8_SNORM: 660 { 661 signed char* gr = (signed char*)element; 662 663 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00); 664 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF); 665 } 666 break; 667 case FORMAT_G8R8: 668 { 669 unsigned short gr = *(unsigned short*)element; 670 671 g = (gr & 0xFF00) * (1.0f / 0xFF00); 672 r = (gr & 0x00FF) * (1.0f / 0x00FF); 673 } 674 break; 675 case FORMAT_G8R8I: 676 { 677 signed char* gr = (signed char*)element; 678 679 r = gr[0]; 680 g = gr[1]; 681 } 682 break; 683 case FORMAT_G8R8UI: 684 { 685 unsigned char* gr = (unsigned char*)element; 686 687 r = gr[0]; 688 g = gr[1]; 689 } 690 break; 691 case FORMAT_R16I: 692 r = *((short*)element); 693 break; 694 case FORMAT_R16UI: 695 r = *((unsigned short*)element); 696 break; 697 case FORMAT_G16R16I: 698 { 699 short* gr = (short*)element; 700 701 r = gr[0]; 702 g = gr[1]; 703 } 704 break; 705 case FORMAT_G16R16: 706 { 707 unsigned int gr = *(unsigned int*)element; 708 709 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000); 710 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF); 711 } 712 break; 713 case FORMAT_G16R16UI: 714 { 715 unsigned short* gr = (unsigned short*)element; 716 717 r = gr[0]; 718 g = gr[1]; 719 } 720 break; 721 case FORMAT_A2R10G10B10: 722 { 723 unsigned int argb = *(unsigned int*)element; 724 725 a = (argb & 0xC0000000) * (1.0f / 0xC0000000); 726 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000); 727 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00); 728 b = (argb & 0x000003FF) * (1.0f / 0x000003FF); 729 } 730 break; 731 case FORMAT_A2B10G10R10: 732 { 733 unsigned int abgr = *(unsigned int*)element; 734 735 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000); 736 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000); 737 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00); 738 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF); 739 } 740 break; 741 case FORMAT_A2B10G10R10UI: 742 { 743 unsigned int abgr = *(unsigned int*)element; 744 745 a = static_cast<float>((abgr & 0xC0000000) >> 30); 746 b = static_cast<float>((abgr & 0x3FF00000) >> 20); 747 g = static_cast<float>((abgr & 0x000FFC00) >> 10); 748 r = static_cast<float>(abgr & 0x000003FF); 749 } 750 break; 751 case FORMAT_A16B16G16R16I: 752 { 753 short* abgr = (short*)element; 754 755 r = abgr[0]; 756 g = abgr[1]; 757 b = abgr[2]; 758 a = abgr[3]; 759 } 760 break; 761 case FORMAT_A16B16G16R16: 762 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF); 763 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF); 764 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF); 765 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 766 break; 767 case FORMAT_A16B16G16R16UI: 768 { 769 unsigned short* abgr = (unsigned short*)element; 770 771 r = abgr[0]; 772 g = abgr[1]; 773 b = abgr[2]; 774 a = abgr[3]; 775 } 776 break; 777 case FORMAT_X16B16G16R16I: 778 { 779 short* bgr = (short*)element; 780 781 r = bgr[0]; 782 g = bgr[1]; 783 b = bgr[2]; 784 } 785 break; 786 case FORMAT_X16B16G16R16UI: 787 { 788 unsigned short* bgr = (unsigned short*)element; 789 790 r = bgr[0]; 791 g = bgr[1]; 792 b = bgr[2]; 793 } 794 break; 795 case FORMAT_A32B32G32R32I: 796 { 797 int* abgr = (int*)element; 798 799 r = static_cast<float>(abgr[0]); 800 g = static_cast<float>(abgr[1]); 801 b = static_cast<float>(abgr[2]); 802 a = static_cast<float>(abgr[3]); 803 } 804 break; 805 case FORMAT_A32B32G32R32UI: 806 { 807 unsigned int* abgr = (unsigned int*)element; 808 809 r = static_cast<float>(abgr[0]); 810 g = static_cast<float>(abgr[1]); 811 b = static_cast<float>(abgr[2]); 812 a = static_cast<float>(abgr[3]); 813 } 814 break; 815 case FORMAT_X32B32G32R32I: 816 { 817 int* bgr = (int*)element; 818 819 r = static_cast<float>(bgr[0]); 820 g = static_cast<float>(bgr[1]); 821 b = static_cast<float>(bgr[2]); 822 } 823 break; 824 case FORMAT_X32B32G32R32UI: 825 { 826 unsigned int* bgr = (unsigned int*)element; 827 828 r = static_cast<float>(bgr[0]); 829 g = static_cast<float>(bgr[1]); 830 b = static_cast<float>(bgr[2]); 831 } 832 break; 833 case FORMAT_G32R32I: 834 { 835 int* gr = (int*)element; 836 837 r = static_cast<float>(gr[0]); 838 g = static_cast<float>(gr[1]); 839 } 840 break; 841 case FORMAT_G32R32UI: 842 { 843 unsigned int* gr = (unsigned int*)element; 844 845 r = static_cast<float>(gr[0]); 846 g = static_cast<float>(gr[1]); 847 } 848 break; 849 case FORMAT_R32I: 850 r = static_cast<float>(*((int*)element)); 851 break; 852 case FORMAT_R32UI: 853 r = static_cast<float>(*((unsigned int*)element)); 854 break; 855 case FORMAT_V8U8: 856 { 857 unsigned short vu = *(unsigned short*)element; 858 859 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000); 860 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000); 861 } 862 break; 863 case FORMAT_L6V5U5: 864 { 865 unsigned short lvu = *(unsigned short*)element; 866 867 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000); 868 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000); 869 b = (lvu & 0xFC00) * (1.0f / 0xFC00); 870 } 871 break; 872 case FORMAT_Q8W8V8U8: 873 { 874 unsigned int qwvu = *(unsigned int*)element; 875 876 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 877 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 878 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000); 879 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000); 880 } 881 break; 882 case FORMAT_X8L8V8U8: 883 { 884 unsigned int xlvu = *(unsigned int*)element; 885 886 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 887 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 888 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000); 889 } 890 break; 891 case FORMAT_R8G8B8: 892 r = ((unsigned char*)element)[2] * (1.0f / 0xFF); 893 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 894 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 895 break; 896 case FORMAT_B8G8R8: 897 r = ((unsigned char*)element)[0] * (1.0f / 0xFF); 898 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 899 b = ((unsigned char*)element)[2] * (1.0f / 0xFF); 900 break; 901 case FORMAT_V16U16: 902 { 903 unsigned int vu = *(unsigned int*)element; 904 905 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000); 906 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000); 907 } 908 break; 909 case FORMAT_A2W10V10U10: 910 { 911 unsigned int awvu = *(unsigned int*)element; 912 913 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000); 914 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000); 915 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000); 916 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000); 917 } 918 break; 919 case FORMAT_A16W16V16U16: 920 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 921 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 922 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 923 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 924 break; 925 case FORMAT_Q16W16V16U16: 926 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 927 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 928 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 929 a = ((signed short*)element)[3] * (1.0f / 0x7FFF); 930 break; 931 case FORMAT_L8: 932 r = 933 g = 934 b = *(unsigned char*)element * (1.0f / 0xFF); 935 break; 936 case FORMAT_A4L4: 937 { 938 unsigned char al = *(unsigned char*)element; 939 940 r = 941 g = 942 b = (al & 0x0F) * (1.0f / 0x0F); 943 a = (al & 0xF0) * (1.0f / 0xF0); 944 } 945 break; 946 case FORMAT_L16: 947 r = 948 g = 949 b = *(unsigned short*)element * (1.0f / 0xFFFF); 950 break; 951 case FORMAT_A8L8: 952 r = 953 g = 954 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 955 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 956 break; 957 case FORMAT_L16F: 958 r = 959 g = 960 b = *(half*)element; 961 break; 962 case FORMAT_A16L16F: 963 r = 964 g = 965 b = ((half*)element)[0]; 966 a = ((half*)element)[1]; 967 break; 968 case FORMAT_L32F: 969 r = 970 g = 971 b = *(float*)element; 972 break; 973 case FORMAT_A32L32F: 974 r = 975 g = 976 b = ((float*)element)[0]; 977 a = ((float*)element)[1]; 978 break; 979 case FORMAT_A16F: 980 a = *(half*)element; 981 break; 982 case FORMAT_R16F: 983 r = *(half*)element; 984 break; 985 case FORMAT_G16R16F: 986 r = ((half*)element)[0]; 987 g = ((half*)element)[1]; 988 break; 989 case FORMAT_X16B16G16R16F: 990 case FORMAT_X16B16G16R16F_UNSIGNED: 991 case FORMAT_B16G16R16F: 992 r = ((half*)element)[0]; 993 g = ((half*)element)[1]; 994 b = ((half*)element)[2]; 995 break; 996 case FORMAT_A16B16G16R16F: 997 r = ((half*)element)[0]; 998 g = ((half*)element)[1]; 999 b = ((half*)element)[2]; 1000 a = ((half*)element)[3]; 1001 break; 1002 case FORMAT_A32F: 1003 a = *(float*)element; 1004 break; 1005 case FORMAT_R32F: 1006 r = *(float*)element; 1007 break; 1008 case FORMAT_G32R32F: 1009 r = ((float*)element)[0]; 1010 g = ((float*)element)[1]; 1011 break; 1012 case FORMAT_X32B32G32R32F: 1013 case FORMAT_X32B32G32R32F_UNSIGNED: 1014 case FORMAT_B32G32R32F: 1015 r = ((float*)element)[0]; 1016 g = ((float*)element)[1]; 1017 b = ((float*)element)[2]; 1018 break; 1019 case FORMAT_A32B32G32R32F: 1020 r = ((float*)element)[0]; 1021 g = ((float*)element)[1]; 1022 b = ((float*)element)[2]; 1023 a = ((float*)element)[3]; 1024 break; 1025 case FORMAT_D32F: 1026 case FORMAT_D32FS8: 1027 case FORMAT_D32F_LOCKABLE: 1028 case FORMAT_D32FS8_TEXTURE: 1029 case FORMAT_D32F_SHADOW: 1030 case FORMAT_D32FS8_SHADOW: 1031 r = *(float*)element; 1032 g = r; 1033 b = r; 1034 a = r; 1035 break; 1036 case FORMAT_D32F_COMPLEMENTARY: 1037 case FORMAT_D32FS8_COMPLEMENTARY: 1038 r = 1.0f - *(float*)element; 1039 g = r; 1040 b = r; 1041 a = r; 1042 break; 1043 case FORMAT_S8: 1044 r = *(unsigned char*)element * (1.0f / 0xFF); 1045 break; 1046 default: 1047 ASSERT(false); 1048 } 1049 1050 if(isSRGBformat(format)) 1051 { 1052 r = sRGBtoLinear(r); 1053 g = sRGBtoLinear(g); 1054 b = sRGBtoLinear(b); 1055 } 1056 1057 return Color<float>(r, g, b, a); 1058 } 1059 sample(float x,float y,float z) const1060 Color<float> Surface::Buffer::sample(float x, float y, float z) const 1061 { 1062 x -= 0.5f; 1063 y -= 0.5f; 1064 z -= 0.5f; 1065 1066 int x0 = clamp((int)x, 0, width - 1); 1067 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1068 1069 int y0 = clamp((int)y, 0, height - 1); 1070 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1071 1072 int z0 = clamp((int)z, 0, depth - 1); 1073 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1; 1074 1075 Color<float> c000 = read(x0, y0, z0); 1076 Color<float> c100 = read(x1, y0, z0); 1077 Color<float> c010 = read(x0, y1, z0); 1078 Color<float> c110 = read(x1, y1, z0); 1079 Color<float> c001 = read(x0, y0, z1); 1080 Color<float> c101 = read(x1, y0, z1); 1081 Color<float> c011 = read(x0, y1, z1); 1082 Color<float> c111 = read(x1, y1, z1); 1083 1084 float fx = x - x0; 1085 float fy = y - y0; 1086 float fz = z - z0; 1087 1088 c000 *= (1 - fx) * (1 - fy) * (1 - fz); 1089 c100 *= fx * (1 - fy) * (1 - fz); 1090 c010 *= (1 - fx) * fy * (1 - fz); 1091 c110 *= fx * fy * (1 - fz); 1092 c001 *= (1 - fx) * (1 - fy) * fz; 1093 c101 *= fx * (1 - fy) * fz; 1094 c011 *= (1 - fx) * fy * fz; 1095 c111 *= fx * fy * fz; 1096 1097 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111; 1098 } 1099 sample(float x,float y,int layer) const1100 Color<float> Surface::Buffer::sample(float x, float y, int layer) const 1101 { 1102 x -= 0.5f; 1103 y -= 0.5f; 1104 1105 int x0 = clamp((int)x, 0, width - 1); 1106 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1107 1108 int y0 = clamp((int)y, 0, height - 1); 1109 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1110 1111 Color<float> c00 = read(x0, y0, layer); 1112 Color<float> c10 = read(x1, y0, layer); 1113 Color<float> c01 = read(x0, y1, layer); 1114 Color<float> c11 = read(x1, y1, layer); 1115 1116 float fx = x - x0; 1117 float fy = y - y0; 1118 1119 c00 *= (1 - fx) * (1 - fy); 1120 c10 *= fx * (1 - fy); 1121 c01 *= (1 - fx) * fy; 1122 c11 *= fx * fy; 1123 1124 return c00 + c10 + c01 + c11; 1125 } 1126 lockRect(int x,int y,int z,Lock lock)1127 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock) 1128 { 1129 this->lock = lock; 1130 1131 switch(lock) 1132 { 1133 case LOCK_UNLOCKED: 1134 case LOCK_READONLY: 1135 case LOCK_UPDATE: 1136 break; 1137 case LOCK_WRITEONLY: 1138 case LOCK_READWRITE: 1139 case LOCK_DISCARD: 1140 dirty = true; 1141 break; 1142 default: 1143 ASSERT(false); 1144 } 1145 1146 if(buffer) 1147 { 1148 x += border; 1149 y += border; 1150 1151 switch(format) 1152 { 1153 case FORMAT_DXT1: 1154 case FORMAT_ATI1: 1155 case FORMAT_ETC1: 1156 case FORMAT_R11_EAC: 1157 case FORMAT_SIGNED_R11_EAC: 1158 case FORMAT_RGB8_ETC2: 1159 case FORMAT_SRGB8_ETC2: 1160 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1161 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1162 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1163 case FORMAT_RG11_EAC: 1164 case FORMAT_SIGNED_RG11_EAC: 1165 case FORMAT_RGBA8_ETC2_EAC: 1166 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1167 case FORMAT_RGBA_ASTC_4x4_KHR: 1168 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1169 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1170 case FORMAT_RGBA_ASTC_5x4_KHR: 1171 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1172 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB; 1173 case FORMAT_RGBA_ASTC_5x5_KHR: 1174 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1175 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB; 1176 case FORMAT_RGBA_ASTC_6x5_KHR: 1177 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1178 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB; 1179 case FORMAT_RGBA_ASTC_6x6_KHR: 1180 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1181 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB; 1182 case FORMAT_RGBA_ASTC_8x5_KHR: 1183 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1184 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB; 1185 case FORMAT_RGBA_ASTC_8x6_KHR: 1186 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1187 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB; 1188 case FORMAT_RGBA_ASTC_8x8_KHR: 1189 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1190 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB; 1191 case FORMAT_RGBA_ASTC_10x5_KHR: 1192 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1193 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB; 1194 case FORMAT_RGBA_ASTC_10x6_KHR: 1195 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1196 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB; 1197 case FORMAT_RGBA_ASTC_10x8_KHR: 1198 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1199 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB; 1200 case FORMAT_RGBA_ASTC_10x10_KHR: 1201 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1202 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB; 1203 case FORMAT_RGBA_ASTC_12x10_KHR: 1204 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1205 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB; 1206 case FORMAT_RGBA_ASTC_12x12_KHR: 1207 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1208 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB; 1209 case FORMAT_DXT3: 1210 case FORMAT_DXT5: 1211 case FORMAT_ATI2: 1212 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1213 default: 1214 return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB; 1215 } 1216 } 1217 1218 return nullptr; 1219 } 1220 unlockRect()1221 void Surface::Buffer::unlockRect() 1222 { 1223 lock = LOCK_UNLOCKED; 1224 } 1225 1226 class SurfaceImplementation : public Surface 1227 { 1228 public: SurfaceImplementation(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1229 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1230 : Surface(width, height, depth, format, pixels, pitch, slice) {} SurfaceImplementation(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchP=0)1231 SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0) 1232 : Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {} ~SurfaceImplementation()1233 ~SurfaceImplementation() override {} 1234 lockInternal(int x,int y,int z,Lock lock,Accessor client)1235 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override 1236 { 1237 return Surface::lockInternal(x, y, z, lock, client); 1238 } 1239 unlockInternal()1240 void unlockInternal() override 1241 { 1242 Surface::unlockInternal(); 1243 } 1244 }; 1245 create(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1246 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1247 { 1248 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice); 1249 } 1250 create(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchPprovided)1251 Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) 1252 { 1253 return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided); 1254 } 1255 Surface(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1256 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false) 1257 { 1258 resource = new Resource(0); 1259 hasParent = false; 1260 ownExternal = false; 1261 depth = max(1, depth); 1262 1263 external.buffer = pixels; 1264 external.width = width; 1265 external.height = height; 1266 external.depth = depth; 1267 external.samples = 1; 1268 external.format = format; 1269 external.bytes = bytes(external.format); 1270 external.pitchB = pitch; 1271 external.pitchP = external.bytes ? pitch / external.bytes : 0; 1272 external.sliceB = slice; 1273 external.sliceP = external.bytes ? slice / external.bytes : 0; 1274 external.border = 0; 1275 external.lock = LOCK_UNLOCKED; 1276 external.dirty = true; 1277 1278 internal.buffer = nullptr; 1279 internal.width = width; 1280 internal.height = height; 1281 internal.depth = depth; 1282 internal.samples = 1; 1283 internal.format = selectInternalFormat(format); 1284 internal.bytes = bytes(internal.format); 1285 internal.pitchB = pitchB(internal.width, 0, internal.format, false); 1286 internal.pitchP = pitchP(internal.width, 0, internal.format, false); 1287 internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false); 1288 internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false); 1289 internal.border = 0; 1290 internal.lock = LOCK_UNLOCKED; 1291 internal.dirty = false; 1292 1293 stencil.buffer = nullptr; 1294 stencil.width = width; 1295 stencil.height = height; 1296 stencil.depth = depth; 1297 stencil.samples = 1; 1298 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL; 1299 stencil.bytes = bytes(stencil.format); 1300 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false); 1301 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false); 1302 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false); 1303 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false); 1304 stencil.border = 0; 1305 stencil.lock = LOCK_UNLOCKED; 1306 stencil.dirty = false; 1307 1308 dirtyContents = true; 1309 paletteUsed = 0; 1310 } 1311 Surface(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchPprovided)1312 Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget) 1313 { 1314 resource = texture ? texture : new Resource(0); 1315 hasParent = texture != nullptr; 1316 ownExternal = true; 1317 depth = max(1, depth); 1318 samples = max(1, samples); 1319 1320 external.buffer = nullptr; 1321 external.width = width; 1322 external.height = height; 1323 external.depth = depth; 1324 external.samples = (short)samples; 1325 external.format = format; 1326 external.bytes = bytes(external.format); 1327 external.pitchB = !pitchPprovided ? pitchB(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided * external.bytes; 1328 external.pitchP = !pitchPprovided ? pitchP(external.width, 0, external.format, renderTarget && !texture) : pitchPprovided; 1329 external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture); 1330 external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture); 1331 external.border = 0; 1332 external.lock = LOCK_UNLOCKED; 1333 external.dirty = false; 1334 1335 internal.buffer = nullptr; 1336 internal.width = width; 1337 internal.height = height; 1338 internal.depth = depth; 1339 internal.samples = (short)samples; 1340 internal.format = selectInternalFormat(format); 1341 internal.bytes = bytes(internal.format); 1342 internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes; 1343 internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided; 1344 internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget); 1345 internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget); 1346 internal.border = (short)border; 1347 internal.lock = LOCK_UNLOCKED; 1348 internal.dirty = false; 1349 1350 stencil.buffer = nullptr; 1351 stencil.width = width; 1352 stencil.height = height; 1353 stencil.depth = depth; 1354 stencil.samples = (short)samples; 1355 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL; 1356 stencil.bytes = bytes(stencil.format); 1357 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget); 1358 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget); 1359 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget); 1360 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget); 1361 stencil.border = 0; 1362 stencil.lock = LOCK_UNLOCKED; 1363 stencil.dirty = false; 1364 1365 dirtyContents = true; 1366 paletteUsed = 0; 1367 } 1368 ~Surface()1369 Surface::~Surface() 1370 { 1371 // sync() must be called before this destructor to ensure all locks have been released. 1372 // We can't call it here because the parent resource may already have been destroyed. 1373 ASSERT(isUnlocked()); 1374 1375 if(!hasParent) 1376 { 1377 resource->destruct(); 1378 } 1379 1380 if(ownExternal) 1381 { 1382 deallocate(external.buffer); 1383 } 1384 1385 if(internal.buffer != external.buffer) 1386 { 1387 deallocate(internal.buffer); 1388 } 1389 1390 deallocate(stencil.buffer); 1391 1392 external.buffer = nullptr; 1393 internal.buffer = nullptr; 1394 stencil.buffer = nullptr; 1395 } 1396 lockExternal(int x,int y,int z,Lock lock,Accessor client)1397 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client) 1398 { 1399 resource->lock(client); 1400 1401 if(!external.buffer) 1402 { 1403 if(internal.buffer && identicalBuffers()) 1404 { 1405 external.buffer = internal.buffer; 1406 } 1407 else 1408 { 1409 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format); 1410 } 1411 } 1412 1413 if(internal.dirty) 1414 { 1415 if(lock != LOCK_DISCARD) 1416 { 1417 update(external, internal); 1418 } 1419 1420 internal.dirty = false; 1421 } 1422 1423 switch(lock) 1424 { 1425 case LOCK_READONLY: 1426 break; 1427 case LOCK_WRITEONLY: 1428 case LOCK_READWRITE: 1429 case LOCK_DISCARD: 1430 dirtyContents = true; 1431 break; 1432 default: 1433 ASSERT(false); 1434 } 1435 1436 return external.lockRect(x, y, z, lock); 1437 } 1438 unlockExternal()1439 void Surface::unlockExternal() 1440 { 1441 external.unlockRect(); 1442 1443 resource->unlock(); 1444 } 1445 lockInternal(int x,int y,int z,Lock lock,Accessor client)1446 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client) 1447 { 1448 if(lock != LOCK_UNLOCKED) 1449 { 1450 resource->lock(client); 1451 } 1452 1453 if(!internal.buffer) 1454 { 1455 if(external.buffer && identicalBuffers()) 1456 { 1457 internal.buffer = external.buffer; 1458 } 1459 else 1460 { 1461 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format); 1462 } 1463 } 1464 1465 // FIXME: WHQL requires conversion to lower external precision and back 1466 if(logPrecision >= WHQL) 1467 { 1468 if(internal.dirty && renderTarget && internal.format != external.format) 1469 { 1470 if(lock != LOCK_DISCARD) 1471 { 1472 switch(external.format) 1473 { 1474 case FORMAT_R3G3B2: 1475 case FORMAT_A8R3G3B2: 1476 case FORMAT_A1R5G5B5: 1477 case FORMAT_A2R10G10B10: 1478 case FORMAT_A2B10G10R10: 1479 lockExternal(0, 0, 0, LOCK_READWRITE, client); 1480 unlockExternal(); 1481 break; 1482 default: 1483 // Difference passes WHQL 1484 break; 1485 } 1486 } 1487 } 1488 } 1489 1490 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID)) 1491 { 1492 if(lock != LOCK_DISCARD) 1493 { 1494 update(internal, external); 1495 } 1496 1497 external.dirty = false; 1498 paletteUsed = Surface::paletteID; 1499 } 1500 1501 switch(lock) 1502 { 1503 case LOCK_UNLOCKED: 1504 case LOCK_READONLY: 1505 break; 1506 case LOCK_WRITEONLY: 1507 case LOCK_READWRITE: 1508 case LOCK_DISCARD: 1509 dirtyContents = true; 1510 break; 1511 default: 1512 ASSERT(false); 1513 } 1514 1515 if(lock == LOCK_READONLY && client == PUBLIC) 1516 { 1517 resolve(); 1518 } 1519 1520 return internal.lockRect(x, y, z, lock); 1521 } 1522 unlockInternal()1523 void Surface::unlockInternal() 1524 { 1525 internal.unlockRect(); 1526 1527 resource->unlock(); 1528 } 1529 lockStencil(int x,int y,int front,Accessor client)1530 void *Surface::lockStencil(int x, int y, int front, Accessor client) 1531 { 1532 resource->lock(client); 1533 1534 if(stencil.format == FORMAT_NULL) 1535 { 1536 return nullptr; 1537 } 1538 1539 if(!stencil.buffer) 1540 { 1541 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format); 1542 } 1543 1544 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME 1545 } 1546 unlockStencil()1547 void Surface::unlockStencil() 1548 { 1549 stencil.unlockRect(); 1550 1551 resource->unlock(); 1552 } 1553 bytes(Format format)1554 int Surface::bytes(Format format) 1555 { 1556 switch(format) 1557 { 1558 case FORMAT_NULL: return 0; 1559 case FORMAT_P8: return 1; 1560 case FORMAT_A8P8: return 2; 1561 case FORMAT_A8: return 1; 1562 case FORMAT_R8I: return 1; 1563 case FORMAT_R8: return 1; 1564 case FORMAT_R3G3B2: return 1; 1565 case FORMAT_R16I: return 2; 1566 case FORMAT_R16UI: return 2; 1567 case FORMAT_A8R3G3B2: return 2; 1568 case FORMAT_R5G6B5: return 2; 1569 case FORMAT_A1R5G5B5: return 2; 1570 case FORMAT_X1R5G5B5: return 2; 1571 case FORMAT_R5G5B5A1: return 2; 1572 case FORMAT_X4R4G4B4: return 2; 1573 case FORMAT_A4R4G4B4: return 2; 1574 case FORMAT_R4G4B4A4: return 2; 1575 case FORMAT_R8G8B8: return 3; 1576 case FORMAT_B8G8R8: return 3; 1577 case FORMAT_R32I: return 4; 1578 case FORMAT_R32UI: return 4; 1579 case FORMAT_X8R8G8B8: return 4; 1580 // case FORMAT_X8G8R8B8Q: return 4; 1581 case FORMAT_A8R8G8B8: return 4; 1582 // case FORMAT_A8G8R8B8Q: return 4; 1583 case FORMAT_X8B8G8R8I: return 4; 1584 case FORMAT_X8B8G8R8: return 4; 1585 case FORMAT_SRGB8_X8: return 4; 1586 case FORMAT_SRGB8_A8: return 4; 1587 case FORMAT_A8B8G8R8I: return 4; 1588 case FORMAT_R8UI: return 1; 1589 case FORMAT_G8R8UI: return 2; 1590 case FORMAT_X8B8G8R8UI: return 4; 1591 case FORMAT_A8B8G8R8UI: return 4; 1592 case FORMAT_A8B8G8R8: return 4; 1593 case FORMAT_R8_SNORM: return 1; 1594 case FORMAT_G8R8_SNORM: return 2; 1595 case FORMAT_X8B8G8R8_SNORM: return 4; 1596 case FORMAT_A8B8G8R8_SNORM: return 4; 1597 case FORMAT_A2R10G10B10: return 4; 1598 case FORMAT_A2B10G10R10: return 4; 1599 case FORMAT_A2B10G10R10UI: return 4; 1600 case FORMAT_G8R8I: return 2; 1601 case FORMAT_G8R8: return 2; 1602 case FORMAT_G16R16I: return 4; 1603 case FORMAT_G16R16UI: return 4; 1604 case FORMAT_G16R16: return 4; 1605 case FORMAT_G32R32I: return 8; 1606 case FORMAT_G32R32UI: return 8; 1607 case FORMAT_X16B16G16R16I: return 8; 1608 case FORMAT_X16B16G16R16UI: return 8; 1609 case FORMAT_A16B16G16R16I: return 8; 1610 case FORMAT_A16B16G16R16UI: return 8; 1611 case FORMAT_A16B16G16R16: return 8; 1612 case FORMAT_X32B32G32R32I: return 16; 1613 case FORMAT_X32B32G32R32UI: return 16; 1614 case FORMAT_A32B32G32R32I: return 16; 1615 case FORMAT_A32B32G32R32UI: return 16; 1616 // Compressed formats 1617 case FORMAT_DXT1: return 2; // Column of four pixels 1618 case FORMAT_DXT3: return 4; // Column of four pixels 1619 case FORMAT_DXT5: return 4; // Column of four pixels 1620 case FORMAT_ATI1: return 2; // Column of four pixels 1621 case FORMAT_ATI2: return 4; // Column of four pixels 1622 case FORMAT_ETC1: return 2; // Column of four pixels 1623 case FORMAT_R11_EAC: return 2; 1624 case FORMAT_SIGNED_R11_EAC: return 2; 1625 case FORMAT_RG11_EAC: return 4; 1626 case FORMAT_SIGNED_RG11_EAC: return 4; 1627 case FORMAT_RGB8_ETC2: return 2; 1628 case FORMAT_SRGB8_ETC2: return 2; 1629 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1630 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1631 case FORMAT_RGBA8_ETC2_EAC: return 4; 1632 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4; 1633 case FORMAT_RGBA_ASTC_4x4_KHR: 1634 case FORMAT_RGBA_ASTC_5x4_KHR: 1635 case FORMAT_RGBA_ASTC_5x5_KHR: 1636 case FORMAT_RGBA_ASTC_6x5_KHR: 1637 case FORMAT_RGBA_ASTC_6x6_KHR: 1638 case FORMAT_RGBA_ASTC_8x5_KHR: 1639 case FORMAT_RGBA_ASTC_8x6_KHR: 1640 case FORMAT_RGBA_ASTC_8x8_KHR: 1641 case FORMAT_RGBA_ASTC_10x5_KHR: 1642 case FORMAT_RGBA_ASTC_10x6_KHR: 1643 case FORMAT_RGBA_ASTC_10x8_KHR: 1644 case FORMAT_RGBA_ASTC_10x10_KHR: 1645 case FORMAT_RGBA_ASTC_12x10_KHR: 1646 case FORMAT_RGBA_ASTC_12x12_KHR: 1647 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1648 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1649 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1650 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1651 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1652 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1653 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1654 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1655 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1656 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1657 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1658 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1659 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1660 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME 1661 // Bumpmap formats 1662 case FORMAT_V8U8: return 2; 1663 case FORMAT_L6V5U5: return 2; 1664 case FORMAT_Q8W8V8U8: return 4; 1665 case FORMAT_X8L8V8U8: return 4; 1666 case FORMAT_A2W10V10U10: return 4; 1667 case FORMAT_V16U16: return 4; 1668 case FORMAT_A16W16V16U16: return 8; 1669 case FORMAT_Q16W16V16U16: return 8; 1670 // Luminance formats 1671 case FORMAT_L8: return 1; 1672 case FORMAT_A4L4: return 1; 1673 case FORMAT_L16: return 2; 1674 case FORMAT_A8L8: return 2; 1675 case FORMAT_L16F: return 2; 1676 case FORMAT_A16L16F: return 4; 1677 case FORMAT_L32F: return 4; 1678 case FORMAT_A32L32F: return 8; 1679 // Floating-point formats 1680 case FORMAT_A16F: return 2; 1681 case FORMAT_R16F: return 2; 1682 case FORMAT_G16R16F: return 4; 1683 case FORMAT_B16G16R16F: return 6; 1684 case FORMAT_X16B16G16R16F: return 8; 1685 case FORMAT_A16B16G16R16F: return 8; 1686 case FORMAT_X16B16G16R16F_UNSIGNED: return 8; 1687 case FORMAT_A32F: return 4; 1688 case FORMAT_R32F: return 4; 1689 case FORMAT_G32R32F: return 8; 1690 case FORMAT_B32G32R32F: return 12; 1691 case FORMAT_X32B32G32R32F: return 16; 1692 case FORMAT_A32B32G32R32F: return 16; 1693 case FORMAT_X32B32G32R32F_UNSIGNED: return 16; 1694 // Depth/stencil formats 1695 case FORMAT_D16: return 2; 1696 case FORMAT_D32: return 4; 1697 case FORMAT_D24X8: return 4; 1698 case FORMAT_D24S8: return 4; 1699 case FORMAT_D24FS8: return 4; 1700 case FORMAT_D32F: return 4; 1701 case FORMAT_D32FS8: return 4; 1702 case FORMAT_D32F_COMPLEMENTARY: return 4; 1703 case FORMAT_D32FS8_COMPLEMENTARY: return 4; 1704 case FORMAT_D32F_LOCKABLE: return 4; 1705 case FORMAT_D32FS8_TEXTURE: return 4; 1706 case FORMAT_D32F_SHADOW: return 4; 1707 case FORMAT_D32FS8_SHADOW: return 4; 1708 case FORMAT_DF24S8: return 4; 1709 case FORMAT_DF16S8: return 2; 1710 case FORMAT_INTZ: return 4; 1711 case FORMAT_S8: return 1; 1712 case FORMAT_YV12_BT601: return 1; // Y plane only 1713 case FORMAT_YV12_BT709: return 1; // Y plane only 1714 case FORMAT_YV12_JFIF: return 1; // Y plane only 1715 default: 1716 ASSERT(false); 1717 } 1718 1719 return 0; 1720 } 1721 pitchB(int width,int border,Format format,bool target)1722 int Surface::pitchB(int width, int border, Format format, bool target) 1723 { 1724 width += 2 * border; 1725 1726 // Render targets require 2x2 quads 1727 if(target || isDepth(format) || isStencil(format)) 1728 { 1729 width = align<2>(width); 1730 } 1731 1732 switch(format) 1733 { 1734 case FORMAT_DXT1: 1735 case FORMAT_ETC1: 1736 case FORMAT_R11_EAC: 1737 case FORMAT_SIGNED_R11_EAC: 1738 case FORMAT_RGB8_ETC2: 1739 case FORMAT_SRGB8_ETC2: 1740 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1741 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1742 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows 1743 case FORMAT_RG11_EAC: 1744 case FORMAT_SIGNED_RG11_EAC: 1745 case FORMAT_RGBA8_ETC2_EAC: 1746 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1747 case FORMAT_RGBA_ASTC_4x4_KHR: 1748 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1749 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1750 case FORMAT_RGBA_ASTC_5x4_KHR: 1751 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1752 case FORMAT_RGBA_ASTC_5x5_KHR: 1753 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1754 return 16 * ((width + 4) / 5); 1755 case FORMAT_RGBA_ASTC_6x5_KHR: 1756 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1757 case FORMAT_RGBA_ASTC_6x6_KHR: 1758 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1759 return 16 * ((width + 5) / 6); 1760 case FORMAT_RGBA_ASTC_8x5_KHR: 1761 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1762 case FORMAT_RGBA_ASTC_8x6_KHR: 1763 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1764 case FORMAT_RGBA_ASTC_8x8_KHR: 1765 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1766 return 16 * ((width + 7) / 8); 1767 case FORMAT_RGBA_ASTC_10x5_KHR: 1768 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1769 case FORMAT_RGBA_ASTC_10x6_KHR: 1770 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1771 case FORMAT_RGBA_ASTC_10x8_KHR: 1772 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1773 case FORMAT_RGBA_ASTC_10x10_KHR: 1774 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1775 return 16 * ((width + 9) / 10); 1776 case FORMAT_RGBA_ASTC_12x10_KHR: 1777 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1778 case FORMAT_RGBA_ASTC_12x12_KHR: 1779 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1780 return 16 * ((width + 11) / 12); 1781 case FORMAT_DXT3: 1782 case FORMAT_DXT5: 1783 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1784 case FORMAT_ATI1: 1785 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row 1786 case FORMAT_ATI2: 1787 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row 1788 case FORMAT_YV12_BT601: 1789 case FORMAT_YV12_BT709: 1790 case FORMAT_YV12_JFIF: 1791 return align<16>(width); 1792 default: 1793 return bytes(format) * width; 1794 } 1795 } 1796 pitchP(int width,int border,Format format,bool target)1797 int Surface::pitchP(int width, int border, Format format, bool target) 1798 { 1799 int B = bytes(format); 1800 1801 return B > 0 ? pitchB(width, border, format, target) / B : 0; 1802 } 1803 sliceB(int width,int height,int border,Format format,bool target)1804 int Surface::sliceB(int width, int height, int border, Format format, bool target) 1805 { 1806 height += 2 * border; 1807 1808 // Render targets require 2x2 quads 1809 if(target || isDepth(format) || isStencil(format)) 1810 { 1811 height = align<2>(height); 1812 } 1813 1814 switch(format) 1815 { 1816 case FORMAT_DXT1: 1817 case FORMAT_DXT3: 1818 case FORMAT_DXT5: 1819 case FORMAT_ETC1: 1820 case FORMAT_R11_EAC: 1821 case FORMAT_SIGNED_R11_EAC: 1822 case FORMAT_RG11_EAC: 1823 case FORMAT_SIGNED_RG11_EAC: 1824 case FORMAT_RGB8_ETC2: 1825 case FORMAT_SRGB8_ETC2: 1826 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1827 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1828 case FORMAT_RGBA8_ETC2_EAC: 1829 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1830 case FORMAT_RGBA_ASTC_4x4_KHR: 1831 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1832 case FORMAT_RGBA_ASTC_5x4_KHR: 1833 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1834 return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows 1835 case FORMAT_RGBA_ASTC_5x5_KHR: 1836 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1837 case FORMAT_RGBA_ASTC_6x5_KHR: 1838 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1839 case FORMAT_RGBA_ASTC_8x5_KHR: 1840 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1841 case FORMAT_RGBA_ASTC_10x5_KHR: 1842 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1843 return pitchB(width, border, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows 1844 case FORMAT_RGBA_ASTC_6x6_KHR: 1845 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1846 case FORMAT_RGBA_ASTC_8x6_KHR: 1847 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1848 case FORMAT_RGBA_ASTC_10x6_KHR: 1849 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1850 return pitchB(width, border, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows 1851 case FORMAT_RGBA_ASTC_8x8_KHR: 1852 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1853 case FORMAT_RGBA_ASTC_10x8_KHR: 1854 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1855 return pitchB(width, border, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows 1856 case FORMAT_RGBA_ASTC_10x10_KHR: 1857 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1858 case FORMAT_RGBA_ASTC_12x10_KHR: 1859 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1860 return pitchB(width, border, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows 1861 case FORMAT_RGBA_ASTC_12x12_KHR: 1862 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1863 return pitchB(width, border, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows 1864 case FORMAT_ATI1: 1865 case FORMAT_ATI2: 1866 return pitchB(width, border, format, target) * align<4>(height); // Pitch computed per row 1867 default: 1868 return pitchB(width, border, format, target) * height; // Pitch computed per row 1869 } 1870 } 1871 sliceP(int width,int height,int border,Format format,bool target)1872 int Surface::sliceP(int width, int height, int border, Format format, bool target) 1873 { 1874 int B = bytes(format); 1875 1876 return B > 0 ? sliceB(width, height, border, format, target) / B : 0; 1877 } 1878 update(Buffer & destination,Buffer & source)1879 void Surface::update(Buffer &destination, Buffer &source) 1880 { 1881 // ASSERT(source.lock != LOCK_UNLOCKED); 1882 // ASSERT(destination.lock != LOCK_UNLOCKED); 1883 1884 if(destination.buffer != source.buffer) 1885 { 1886 ASSERT(source.dirty && !destination.dirty); 1887 1888 switch(source.format) 1889 { 1890 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format 1891 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format 1892 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format 1893 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format 1894 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format 1895 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format 1896 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format 1897 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format 1898 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format 1899 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format 1900 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format 1901 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format 1902 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format 1903 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format 1904 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format 1905 case FORMAT_ETC1: 1906 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format 1907 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format 1908 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format 1909 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format 1910 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format 1911 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format 1912 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format 1913 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format 1914 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format 1915 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format 1916 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format 1917 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format 1918 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format 1919 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format 1920 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format 1921 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format 1922 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format 1923 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format 1924 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format 1925 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format 1926 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format 1927 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format 1928 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format 1929 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format 1930 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format 1931 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format 1932 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format 1933 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format 1934 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format 1935 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format 1936 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format 1937 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format 1938 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format 1939 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format 1940 default: genericUpdate(destination, source); break; 1941 } 1942 } 1943 } 1944 genericUpdate(Buffer & destination,Buffer & source)1945 void Surface::genericUpdate(Buffer &destination, Buffer &source) 1946 { 1947 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1948 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1949 1950 int depth = min(destination.depth, source.depth); 1951 int height = min(destination.height, source.height); 1952 int width = min(destination.width, source.width); 1953 int rowBytes = width * source.bytes; 1954 1955 for(int z = 0; z < depth; z++) 1956 { 1957 unsigned char *sourceRow = sourceSlice; 1958 unsigned char *destinationRow = destinationSlice; 1959 1960 for(int y = 0; y < height; y++) 1961 { 1962 if(source.format == destination.format) 1963 { 1964 memcpy(destinationRow, sourceRow, rowBytes); 1965 } 1966 else 1967 { 1968 unsigned char *sourceElement = sourceRow; 1969 unsigned char *destinationElement = destinationRow; 1970 1971 for(int x = 0; x < width; x++) 1972 { 1973 Color<float> color = source.read(sourceElement); 1974 destination.write(destinationElement, color); 1975 1976 sourceElement += source.bytes; 1977 destinationElement += destination.bytes; 1978 } 1979 } 1980 1981 sourceRow += source.pitchB; 1982 destinationRow += destination.pitchB; 1983 } 1984 1985 sourceSlice += source.sliceB; 1986 destinationSlice += destination.sliceB; 1987 } 1988 1989 source.unlockRect(); 1990 destination.unlockRect(); 1991 } 1992 decodeR8G8B8(Buffer & destination,Buffer & source)1993 void Surface::decodeR8G8B8(Buffer &destination, Buffer &source) 1994 { 1995 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1996 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1997 1998 int depth = min(destination.depth, source.depth); 1999 int height = min(destination.height, source.height); 2000 int width = min(destination.width, source.width); 2001 2002 for(int z = 0; z < depth; z++) 2003 { 2004 unsigned char *sourceRow = sourceSlice; 2005 unsigned char *destinationRow = destinationSlice; 2006 2007 for(int y = 0; y < height; y++) 2008 { 2009 unsigned char *sourceElement = sourceRow; 2010 unsigned char *destinationElement = destinationRow; 2011 2012 for(int x = 0; x < width; x++) 2013 { 2014 unsigned int b = sourceElement[0]; 2015 unsigned int g = sourceElement[1]; 2016 unsigned int r = sourceElement[2]; 2017 2018 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0); 2019 2020 sourceElement += source.bytes; 2021 destinationElement += destination.bytes; 2022 } 2023 2024 sourceRow += source.pitchB; 2025 destinationRow += destination.pitchB; 2026 } 2027 2028 sourceSlice += source.sliceB; 2029 destinationSlice += destination.sliceB; 2030 } 2031 2032 source.unlockRect(); 2033 destination.unlockRect(); 2034 } 2035 decodeX1R5G5B5(Buffer & destination,Buffer & source)2036 void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source) 2037 { 2038 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2039 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2040 2041 int depth = min(destination.depth, source.depth); 2042 int height = min(destination.height, source.height); 2043 int width = min(destination.width, source.width); 2044 2045 for(int z = 0; z < depth; z++) 2046 { 2047 unsigned char *sourceRow = sourceSlice; 2048 unsigned char *destinationRow = destinationSlice; 2049 2050 for(int y = 0; y < height; y++) 2051 { 2052 unsigned char *sourceElement = sourceRow; 2053 unsigned char *destinationElement = destinationRow; 2054 2055 for(int x = 0; x < width; x++) 2056 { 2057 unsigned int xrgb = *(unsigned short*)sourceElement; 2058 2059 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 2060 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 2061 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8); 2062 2063 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2064 2065 sourceElement += source.bytes; 2066 destinationElement += destination.bytes; 2067 } 2068 2069 sourceRow += source.pitchB; 2070 destinationRow += destination.pitchB; 2071 } 2072 2073 sourceSlice += source.sliceB; 2074 destinationSlice += destination.sliceB; 2075 } 2076 2077 source.unlockRect(); 2078 destination.unlockRect(); 2079 } 2080 decodeA1R5G5B5(Buffer & destination,Buffer & source)2081 void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source) 2082 { 2083 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2084 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2085 2086 int depth = min(destination.depth, source.depth); 2087 int height = min(destination.height, source.height); 2088 int width = min(destination.width, source.width); 2089 2090 for(int z = 0; z < depth; z++) 2091 { 2092 unsigned char *sourceRow = sourceSlice; 2093 unsigned char *destinationRow = destinationSlice; 2094 2095 for(int y = 0; y < height; y++) 2096 { 2097 unsigned char *sourceElement = sourceRow; 2098 unsigned char *destinationElement = destinationRow; 2099 2100 for(int x = 0; x < width; x++) 2101 { 2102 unsigned int argb = *(unsigned short*)sourceElement; 2103 2104 unsigned int a = (argb & 0x8000) * 130560; 2105 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 2106 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 2107 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8); 2108 2109 *(unsigned int*)destinationElement = a | r | g | b; 2110 2111 sourceElement += source.bytes; 2112 destinationElement += destination.bytes; 2113 } 2114 2115 sourceRow += source.pitchB; 2116 destinationRow += destination.pitchB; 2117 } 2118 2119 sourceSlice += source.sliceB; 2120 destinationSlice += destination.sliceB; 2121 } 2122 2123 source.unlockRect(); 2124 destination.unlockRect(); 2125 } 2126 decodeX4R4G4B4(Buffer & destination,Buffer & source)2127 void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source) 2128 { 2129 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2130 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2131 2132 int depth = min(destination.depth, source.depth); 2133 int height = min(destination.height, source.height); 2134 int width = min(destination.width, source.width); 2135 2136 for(int z = 0; z < depth; z++) 2137 { 2138 unsigned char *sourceRow = sourceSlice; 2139 unsigned char *destinationRow = destinationSlice; 2140 2141 for(int y = 0; y < height; y++) 2142 { 2143 unsigned char *sourceElement = sourceRow; 2144 unsigned char *destinationElement = destinationRow; 2145 2146 for(int x = 0; x < width; x++) 2147 { 2148 unsigned int xrgb = *(unsigned short*)sourceElement; 2149 2150 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000; 2151 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00; 2152 unsigned int b = (xrgb & 0x000F) * 0x00000011; 2153 2154 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2155 2156 sourceElement += source.bytes; 2157 destinationElement += destination.bytes; 2158 } 2159 2160 sourceRow += source.pitchB; 2161 destinationRow += destination.pitchB; 2162 } 2163 2164 sourceSlice += source.sliceB; 2165 destinationSlice += destination.sliceB; 2166 } 2167 2168 source.unlockRect(); 2169 destination.unlockRect(); 2170 } 2171 decodeA4R4G4B4(Buffer & destination,Buffer & source)2172 void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source) 2173 { 2174 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2175 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2176 2177 int depth = min(destination.depth, source.depth); 2178 int height = min(destination.height, source.height); 2179 int width = min(destination.width, source.width); 2180 2181 for(int z = 0; z < depth; z++) 2182 { 2183 unsigned char *sourceRow = sourceSlice; 2184 unsigned char *destinationRow = destinationSlice; 2185 2186 for(int y = 0; y < height; y++) 2187 { 2188 unsigned char *sourceElement = sourceRow; 2189 unsigned char *destinationElement = destinationRow; 2190 2191 for(int x = 0; x < width; x++) 2192 { 2193 unsigned int argb = *(unsigned short*)sourceElement; 2194 2195 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000; 2196 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000; 2197 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00; 2198 unsigned int b = (argb & 0x000F) * 0x00000011; 2199 2200 *(unsigned int*)destinationElement = a | r | g | b; 2201 2202 sourceElement += source.bytes; 2203 destinationElement += destination.bytes; 2204 } 2205 2206 sourceRow += source.pitchB; 2207 destinationRow += destination.pitchB; 2208 } 2209 2210 sourceSlice += source.sliceB; 2211 destinationSlice += destination.sliceB; 2212 } 2213 2214 source.unlockRect(); 2215 destination.unlockRect(); 2216 } 2217 decodeP8(Buffer & destination,Buffer & source)2218 void Surface::decodeP8(Buffer &destination, Buffer &source) 2219 { 2220 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2221 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2222 2223 int depth = min(destination.depth, source.depth); 2224 int height = min(destination.height, source.height); 2225 int width = min(destination.width, source.width); 2226 2227 for(int z = 0; z < depth; z++) 2228 { 2229 unsigned char *sourceRow = sourceSlice; 2230 unsigned char *destinationRow = destinationSlice; 2231 2232 for(int y = 0; y < height; y++) 2233 { 2234 unsigned char *sourceElement = sourceRow; 2235 unsigned char *destinationElement = destinationRow; 2236 2237 for(int x = 0; x < width; x++) 2238 { 2239 unsigned int abgr = palette[*(unsigned char*)sourceElement]; 2240 2241 unsigned int r = (abgr & 0x000000FF) << 16; 2242 unsigned int g = (abgr & 0x0000FF00) << 0; 2243 unsigned int b = (abgr & 0x00FF0000) >> 16; 2244 unsigned int a = (abgr & 0xFF000000) >> 0; 2245 2246 *(unsigned int*)destinationElement = a | r | g | b; 2247 2248 sourceElement += source.bytes; 2249 destinationElement += destination.bytes; 2250 } 2251 2252 sourceRow += source.pitchB; 2253 destinationRow += destination.pitchB; 2254 } 2255 2256 sourceSlice += source.sliceB; 2257 destinationSlice += destination.sliceB; 2258 } 2259 2260 source.unlockRect(); 2261 destination.unlockRect(); 2262 } 2263 decodeDXT1(Buffer & internal,Buffer & external)2264 void Surface::decodeDXT1(Buffer &internal, Buffer &external) 2265 { 2266 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2267 const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY); 2268 2269 for(int z = 0; z < external.depth; z++) 2270 { 2271 unsigned int *dest = destSlice; 2272 2273 for(int y = 0; y < external.height; y += 4) 2274 { 2275 for(int x = 0; x < external.width; x += 4) 2276 { 2277 Color<byte> c[4]; 2278 2279 c[0] = source->c0; 2280 c[1] = source->c1; 2281 2282 if(source->c0 > source->c1) // No transparency 2283 { 2284 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2285 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2286 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2287 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2288 c[2].a = 0xFF; 2289 2290 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2291 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2292 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2293 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2294 c[3].a = 0xFF; 2295 } 2296 else // c3 transparent 2297 { 2298 // c2 = 1 / 2 * c0 + 1 / 2 * c1 2299 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2); 2300 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2); 2301 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2); 2302 c[2].a = 0xFF; 2303 2304 c[3].r = 0; 2305 c[3].g = 0; 2306 c[3].b = 0; 2307 c[3].a = 0; 2308 } 2309 2310 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2311 { 2312 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2313 { 2314 dest[(x + i) + (y + j) * internal.pitchP] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4]; 2315 } 2316 } 2317 2318 source++; 2319 } 2320 } 2321 2322 (byte*&)destSlice += internal.sliceB; 2323 } 2324 2325 external.unlockRect(); 2326 internal.unlockRect(); 2327 } 2328 decodeDXT3(Buffer & internal,Buffer & external)2329 void Surface::decodeDXT3(Buffer &internal, Buffer &external) 2330 { 2331 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2332 const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY); 2333 2334 for(int z = 0; z < external.depth; z++) 2335 { 2336 unsigned int *dest = destSlice; 2337 2338 for(int y = 0; y < external.height; y += 4) 2339 { 2340 for(int x = 0; x < external.width; x += 4) 2341 { 2342 Color<byte> c[4]; 2343 2344 c[0] = source->c0; 2345 c[1] = source->c1; 2346 2347 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2348 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2349 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2350 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2351 2352 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2353 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2354 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2355 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2356 2357 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2358 { 2359 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2360 { 2361 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F; 2362 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24)); 2363 2364 dest[(x + i) + (y + j) * internal.pitchP] = color; 2365 } 2366 } 2367 2368 source++; 2369 } 2370 } 2371 2372 (byte*&)destSlice += internal.sliceB; 2373 } 2374 2375 external.unlockRect(); 2376 internal.unlockRect(); 2377 } 2378 decodeDXT5(Buffer & internal,Buffer & external)2379 void Surface::decodeDXT5(Buffer &internal, Buffer &external) 2380 { 2381 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2382 const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY); 2383 2384 for(int z = 0; z < external.depth; z++) 2385 { 2386 unsigned int *dest = destSlice; 2387 2388 for(int y = 0; y < external.height; y += 4) 2389 { 2390 for(int x = 0; x < external.width; x += 4) 2391 { 2392 Color<byte> c[4]; 2393 2394 c[0] = source->c0; 2395 c[1] = source->c1; 2396 2397 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2398 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2399 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2400 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2401 2402 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2403 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2404 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2405 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2406 2407 byte a[8]; 2408 2409 a[0] = source->a0; 2410 a[1] = source->a1; 2411 2412 if(a[0] > a[1]) 2413 { 2414 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7); 2415 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7); 2416 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7); 2417 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7); 2418 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7); 2419 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7); 2420 } 2421 else 2422 { 2423 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5); 2424 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5); 2425 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5); 2426 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5); 2427 a[6] = 0; 2428 a[7] = 0xFF; 2429 } 2430 2431 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2432 { 2433 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2434 { 2435 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24; 2436 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha; 2437 2438 dest[(x + i) + (y + j) * internal.pitchP] = color; 2439 } 2440 } 2441 2442 source++; 2443 } 2444 } 2445 2446 (byte*&)destSlice += internal.sliceB; 2447 } 2448 2449 external.unlockRect(); 2450 internal.unlockRect(); 2451 } 2452 decodeATI1(Buffer & internal,Buffer & external)2453 void Surface::decodeATI1(Buffer &internal, Buffer &external) 2454 { 2455 byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2456 const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY); 2457 2458 for(int z = 0; z < external.depth; z++) 2459 { 2460 byte *dest = destSlice; 2461 2462 for(int y = 0; y < external.height; y += 4) 2463 { 2464 for(int x = 0; x < external.width; x += 4) 2465 { 2466 byte r[8]; 2467 2468 r[0] = source->r0; 2469 r[1] = source->r1; 2470 2471 if(r[0] > r[1]) 2472 { 2473 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7); 2474 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7); 2475 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7); 2476 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7); 2477 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7); 2478 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7); 2479 } 2480 else 2481 { 2482 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5); 2483 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5); 2484 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5); 2485 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5); 2486 r[6] = 0; 2487 r[7] = 0xFF; 2488 } 2489 2490 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2491 { 2492 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2493 { 2494 dest[(x + i) + (y + j) * internal.pitchP] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8]; 2495 } 2496 } 2497 2498 source++; 2499 } 2500 } 2501 2502 destSlice += internal.sliceB; 2503 } 2504 2505 external.unlockRect(); 2506 internal.unlockRect(); 2507 } 2508 decodeATI2(Buffer & internal,Buffer & external)2509 void Surface::decodeATI2(Buffer &internal, Buffer &external) 2510 { 2511 word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2512 const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY); 2513 2514 for(int z = 0; z < external.depth; z++) 2515 { 2516 word *dest = destSlice; 2517 2518 for(int y = 0; y < external.height; y += 4) 2519 { 2520 for(int x = 0; x < external.width; x += 4) 2521 { 2522 byte X[8]; 2523 2524 X[0] = source->x0; 2525 X[1] = source->x1; 2526 2527 if(X[0] > X[1]) 2528 { 2529 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7); 2530 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7); 2531 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7); 2532 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7); 2533 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7); 2534 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7); 2535 } 2536 else 2537 { 2538 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5); 2539 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5); 2540 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5); 2541 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5); 2542 X[6] = 0; 2543 X[7] = 0xFF; 2544 } 2545 2546 byte Y[8]; 2547 2548 Y[0] = source->y0; 2549 Y[1] = source->y1; 2550 2551 if(Y[0] > Y[1]) 2552 { 2553 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7); 2554 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7); 2555 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7); 2556 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7); 2557 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7); 2558 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7); 2559 } 2560 else 2561 { 2562 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5); 2563 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5); 2564 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5); 2565 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5); 2566 Y[6] = 0; 2567 Y[7] = 0xFF; 2568 } 2569 2570 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2571 { 2572 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2573 { 2574 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8]; 2575 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8]; 2576 2577 dest[(x + i) + (y + j) * internal.pitchP] = (g << 8) + r; 2578 } 2579 } 2580 2581 source++; 2582 } 2583 } 2584 2585 (byte*&)destSlice += internal.sliceB; 2586 } 2587 2588 external.unlockRect(); 2589 internal.unlockRect(); 2590 } 2591 decodeETC2(Buffer & internal,Buffer & external,int nbAlphaBits,bool isSRGB)2592 void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB) 2593 { 2594 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2595 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB)); 2596 external.unlockRect(); 2597 internal.unlockRect(); 2598 2599 if(isSRGB) 2600 { 2601 static byte sRGBtoLinearTable[256]; 2602 static bool sRGBtoLinearTableDirty = true; 2603 if(sRGBtoLinearTableDirty) 2604 { 2605 for(int i = 0; i < 256; i++) 2606 { 2607 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f); 2608 } 2609 sRGBtoLinearTableDirty = false; 2610 } 2611 2612 // Perform sRGB conversion in place after decoding 2613 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE); 2614 for(int y = 0; y < internal.height; y++) 2615 { 2616 byte *srcRow = src + y * internal.pitchB; 2617 for(int x = 0; x < internal.width; x++) 2618 { 2619 byte *srcPix = srcRow + x * internal.bytes; 2620 for(int i = 0; i < 3; i++) 2621 { 2622 srcPix[i] = sRGBtoLinearTable[srcPix[i]]; 2623 } 2624 } 2625 } 2626 internal.unlockRect(); 2627 } 2628 } 2629 decodeEAC(Buffer & internal,Buffer & external,int nbChannels,bool isSigned)2630 void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned) 2631 { 2632 ASSERT(nbChannels == 1 || nbChannels == 2); 2633 2634 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE); 2635 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), src, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2636 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED)); 2637 external.unlockRect(); 2638 2639 // FIXME: We convert EAC data to float, until signed short internal formats are supported 2640 // This code can be removed if ETC2 images are decoded to internal 16 bit signed R/RG formats 2641 const float normalization = isSigned ? (1.0f / (8.0f * 127.875f)) : (1.0f / (8.0f * 255.875f)); 2642 for(int y = 0; y < internal.height; y++) 2643 { 2644 byte* srcRow = src + y * internal.pitchB; 2645 for(int x = internal.width - 1; x >= 0; x--) 2646 { 2647 int* srcPix = reinterpret_cast<int*>(srcRow + x * internal.bytes); 2648 float* dstPix = reinterpret_cast<float*>(srcPix); 2649 for(int c = nbChannels - 1; c >= 0; c--) 2650 { 2651 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f); 2652 } 2653 } 2654 } 2655 2656 internal.unlockRect(); 2657 } 2658 decodeASTC(Buffer & internal,Buffer & external,int xBlockSize,int yBlockSize,int zBlockSize,bool isSRGB)2659 void Surface::decodeASTC(Buffer &internal, Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB) 2660 { 2661 } 2662 size(int width,int height,int depth,int border,int samples,Format format)2663 size_t Surface::size(int width, int height, int depth, int border, int samples, Format format) 2664 { 2665 samples = max(1, samples); 2666 2667 switch(format) 2668 { 2669 default: 2670 { 2671 uint64_t size = (uint64_t)sliceB(width, height, border, format, true) * depth * samples; 2672 2673 // We can only sample buffers smaller than 2 GiB, due to signed 32-bit offset calculations. 2674 // Force an out-of-memory if larger, or let the caller report an error. 2675 if(size >= 0x80000000u) 2676 { 2677 return std::numeric_limits<size_t>::max(); 2678 } 2679 2680 // Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes, 2681 // and stencil operations also read 8 bytes per four 8-bit stencil values, 2682 // so we have to allocate 4 extra bytes to avoid buffer overruns. 2683 // TODO(b/145229887): Eliminate if possible, or don't hard-code. 2684 return size + 4; 2685 } 2686 case FORMAT_YV12_BT601: 2687 case FORMAT_YV12_BT709: 2688 case FORMAT_YV12_JFIF: 2689 { 2690 width += 2 * border; 2691 height += 2 * border; 2692 2693 size_t YStride = align<16>(width); 2694 size_t YSize = YStride * height; 2695 size_t CStride = align<16>(YStride / 2); 2696 size_t CSize = CStride * height / 2; 2697 2698 return YSize + 2 * CSize; 2699 } 2700 } 2701 } 2702 isStencil(Format format)2703 bool Surface::isStencil(Format format) 2704 { 2705 switch(format) 2706 { 2707 case FORMAT_D32: 2708 case FORMAT_D16: 2709 case FORMAT_D24X8: 2710 case FORMAT_D32F: 2711 case FORMAT_D32F_COMPLEMENTARY: 2712 case FORMAT_D32F_LOCKABLE: 2713 case FORMAT_D32F_SHADOW: 2714 return false; 2715 case FORMAT_D24S8: 2716 case FORMAT_D24FS8: 2717 case FORMAT_S8: 2718 case FORMAT_DF24S8: 2719 case FORMAT_DF16S8: 2720 case FORMAT_D32FS8_TEXTURE: 2721 case FORMAT_D32FS8_SHADOW: 2722 case FORMAT_D32FS8: 2723 case FORMAT_D32FS8_COMPLEMENTARY: 2724 case FORMAT_INTZ: 2725 return true; 2726 default: 2727 return false; 2728 } 2729 } 2730 isDepth(Format format)2731 bool Surface::isDepth(Format format) 2732 { 2733 switch(format) 2734 { 2735 case FORMAT_D32: 2736 case FORMAT_D16: 2737 case FORMAT_D24X8: 2738 case FORMAT_D24S8: 2739 case FORMAT_D24FS8: 2740 case FORMAT_D32F: 2741 case FORMAT_D32FS8: 2742 case FORMAT_D32F_COMPLEMENTARY: 2743 case FORMAT_D32FS8_COMPLEMENTARY: 2744 case FORMAT_D32F_LOCKABLE: 2745 case FORMAT_DF24S8: 2746 case FORMAT_DF16S8: 2747 case FORMAT_D32FS8_TEXTURE: 2748 case FORMAT_D32F_SHADOW: 2749 case FORMAT_D32FS8_SHADOW: 2750 case FORMAT_INTZ: 2751 return true; 2752 case FORMAT_S8: 2753 return false; 2754 default: 2755 return false; 2756 } 2757 } 2758 hasQuadLayout(Format format)2759 bool Surface::hasQuadLayout(Format format) 2760 { 2761 switch(format) 2762 { 2763 case FORMAT_D32: 2764 case FORMAT_D16: 2765 case FORMAT_D24X8: 2766 case FORMAT_D24S8: 2767 case FORMAT_D24FS8: 2768 case FORMAT_D32F: 2769 case FORMAT_D32FS8: 2770 case FORMAT_D32F_COMPLEMENTARY: 2771 case FORMAT_D32FS8_COMPLEMENTARY: 2772 case FORMAT_DF24S8: 2773 case FORMAT_DF16S8: 2774 case FORMAT_INTZ: 2775 case FORMAT_S8: 2776 case FORMAT_A8G8R8B8Q: 2777 case FORMAT_X8G8R8B8Q: 2778 return true; 2779 case FORMAT_D32F_LOCKABLE: 2780 case FORMAT_D32FS8_TEXTURE: 2781 case FORMAT_D32F_SHADOW: 2782 case FORMAT_D32FS8_SHADOW: 2783 default: 2784 break; 2785 } 2786 2787 return false; 2788 } 2789 isPalette(Format format)2790 bool Surface::isPalette(Format format) 2791 { 2792 switch(format) 2793 { 2794 case FORMAT_P8: 2795 case FORMAT_A8P8: 2796 return true; 2797 default: 2798 return false; 2799 } 2800 } 2801 isFloatFormat(Format format)2802 bool Surface::isFloatFormat(Format format) 2803 { 2804 switch(format) 2805 { 2806 case FORMAT_R5G6B5: 2807 case FORMAT_R8G8B8: 2808 case FORMAT_B8G8R8: 2809 case FORMAT_X8R8G8B8: 2810 case FORMAT_X8B8G8R8I: 2811 case FORMAT_X8B8G8R8: 2812 case FORMAT_A8R8G8B8: 2813 case FORMAT_SRGB8_X8: 2814 case FORMAT_SRGB8_A8: 2815 case FORMAT_A8B8G8R8I: 2816 case FORMAT_R8UI: 2817 case FORMAT_G8R8UI: 2818 case FORMAT_X8B8G8R8UI: 2819 case FORMAT_A8B8G8R8UI: 2820 case FORMAT_A8B8G8R8: 2821 case FORMAT_G8R8I: 2822 case FORMAT_G8R8: 2823 case FORMAT_A2B10G10R10: 2824 case FORMAT_A2B10G10R10UI: 2825 case FORMAT_R8_SNORM: 2826 case FORMAT_G8R8_SNORM: 2827 case FORMAT_X8B8G8R8_SNORM: 2828 case FORMAT_A8B8G8R8_SNORM: 2829 case FORMAT_R16I: 2830 case FORMAT_R16UI: 2831 case FORMAT_G16R16I: 2832 case FORMAT_G16R16UI: 2833 case FORMAT_G16R16: 2834 case FORMAT_X16B16G16R16I: 2835 case FORMAT_X16B16G16R16UI: 2836 case FORMAT_A16B16G16R16I: 2837 case FORMAT_A16B16G16R16UI: 2838 case FORMAT_A16B16G16R16: 2839 case FORMAT_V8U8: 2840 case FORMAT_Q8W8V8U8: 2841 case FORMAT_X8L8V8U8: 2842 case FORMAT_V16U16: 2843 case FORMAT_A16W16V16U16: 2844 case FORMAT_Q16W16V16U16: 2845 case FORMAT_A8: 2846 case FORMAT_R8I: 2847 case FORMAT_R8: 2848 case FORMAT_S8: 2849 case FORMAT_L8: 2850 case FORMAT_L16: 2851 case FORMAT_A8L8: 2852 case FORMAT_YV12_BT601: 2853 case FORMAT_YV12_BT709: 2854 case FORMAT_YV12_JFIF: 2855 case FORMAT_R32I: 2856 case FORMAT_R32UI: 2857 case FORMAT_G32R32I: 2858 case FORMAT_G32R32UI: 2859 case FORMAT_X32B32G32R32I: 2860 case FORMAT_X32B32G32R32UI: 2861 case FORMAT_A32B32G32R32I: 2862 case FORMAT_A32B32G32R32UI: 2863 return false; 2864 case FORMAT_R16F: 2865 case FORMAT_G16R16F: 2866 case FORMAT_B16G16R16F: 2867 case FORMAT_X16B16G16R16F: 2868 case FORMAT_A16B16G16R16F: 2869 case FORMAT_X16B16G16R16F_UNSIGNED: 2870 case FORMAT_R32F: 2871 case FORMAT_G32R32F: 2872 case FORMAT_B32G32R32F: 2873 case FORMAT_X32B32G32R32F: 2874 case FORMAT_A32B32G32R32F: 2875 case FORMAT_X32B32G32R32F_UNSIGNED: 2876 case FORMAT_D32F: 2877 case FORMAT_D32FS8: 2878 case FORMAT_D32F_COMPLEMENTARY: 2879 case FORMAT_D32FS8_COMPLEMENTARY: 2880 case FORMAT_D32F_LOCKABLE: 2881 case FORMAT_D32FS8_TEXTURE: 2882 case FORMAT_D32F_SHADOW: 2883 case FORMAT_D32FS8_SHADOW: 2884 case FORMAT_L16F: 2885 case FORMAT_A16L16F: 2886 case FORMAT_L32F: 2887 case FORMAT_A32L32F: 2888 return true; 2889 default: 2890 ASSERT(false); 2891 } 2892 2893 return false; 2894 } 2895 isUnsignedComponent(Format format,int component)2896 bool Surface::isUnsignedComponent(Format format, int component) 2897 { 2898 switch(format) 2899 { 2900 case FORMAT_NULL: 2901 case FORMAT_R5G6B5: 2902 case FORMAT_R8G8B8: 2903 case FORMAT_B8G8R8: 2904 case FORMAT_X8R8G8B8: 2905 case FORMAT_X8B8G8R8: 2906 case FORMAT_A8R8G8B8: 2907 case FORMAT_A8B8G8R8: 2908 case FORMAT_SRGB8_X8: 2909 case FORMAT_SRGB8_A8: 2910 case FORMAT_G8R8: 2911 case FORMAT_A2B10G10R10: 2912 case FORMAT_A2B10G10R10UI: 2913 case FORMAT_R16UI: 2914 case FORMAT_G16R16: 2915 case FORMAT_G16R16UI: 2916 case FORMAT_X16B16G16R16UI: 2917 case FORMAT_A16B16G16R16: 2918 case FORMAT_A16B16G16R16UI: 2919 case FORMAT_R32UI: 2920 case FORMAT_G32R32UI: 2921 case FORMAT_X32B32G32R32UI: 2922 case FORMAT_A32B32G32R32UI: 2923 case FORMAT_X32B32G32R32F_UNSIGNED: 2924 case FORMAT_R8UI: 2925 case FORMAT_G8R8UI: 2926 case FORMAT_X8B8G8R8UI: 2927 case FORMAT_A8B8G8R8UI: 2928 case FORMAT_D32F: 2929 case FORMAT_D32FS8: 2930 case FORMAT_D32F_COMPLEMENTARY: 2931 case FORMAT_D32FS8_COMPLEMENTARY: 2932 case FORMAT_D32F_LOCKABLE: 2933 case FORMAT_D32FS8_TEXTURE: 2934 case FORMAT_D32F_SHADOW: 2935 case FORMAT_D32FS8_SHADOW: 2936 case FORMAT_A8: 2937 case FORMAT_R8: 2938 case FORMAT_L8: 2939 case FORMAT_L16: 2940 case FORMAT_A8L8: 2941 case FORMAT_YV12_BT601: 2942 case FORMAT_YV12_BT709: 2943 case FORMAT_YV12_JFIF: 2944 return true; 2945 case FORMAT_A8B8G8R8I: 2946 case FORMAT_A16B16G16R16I: 2947 case FORMAT_A32B32G32R32I: 2948 case FORMAT_A8B8G8R8_SNORM: 2949 case FORMAT_Q8W8V8U8: 2950 case FORMAT_Q16W16V16U16: 2951 case FORMAT_A32B32G32R32F: 2952 return false; 2953 case FORMAT_R32F: 2954 case FORMAT_R8I: 2955 case FORMAT_R16I: 2956 case FORMAT_R32I: 2957 case FORMAT_R8_SNORM: 2958 return component >= 1; 2959 case FORMAT_V8U8: 2960 case FORMAT_X8L8V8U8: 2961 case FORMAT_V16U16: 2962 case FORMAT_G32R32F: 2963 case FORMAT_G8R8I: 2964 case FORMAT_G16R16I: 2965 case FORMAT_G32R32I: 2966 case FORMAT_G8R8_SNORM: 2967 return component >= 2; 2968 case FORMAT_A16W16V16U16: 2969 case FORMAT_B32G32R32F: 2970 case FORMAT_X32B32G32R32F: 2971 case FORMAT_X8B8G8R8I: 2972 case FORMAT_X16B16G16R16I: 2973 case FORMAT_X32B32G32R32I: 2974 case FORMAT_X8B8G8R8_SNORM: 2975 return component >= 3; 2976 default: 2977 ASSERT(false); 2978 } 2979 2980 return false; 2981 } 2982 isSRGBreadable(Format format)2983 bool Surface::isSRGBreadable(Format format) 2984 { 2985 // Keep in sync with Capabilities::isSRGBreadable 2986 switch(format) 2987 { 2988 case FORMAT_L8: 2989 case FORMAT_A8L8: 2990 case FORMAT_R8G8B8: 2991 case FORMAT_A8R8G8B8: 2992 case FORMAT_X8R8G8B8: 2993 case FORMAT_A8B8G8R8: 2994 case FORMAT_X8B8G8R8: 2995 case FORMAT_SRGB8_X8: 2996 case FORMAT_SRGB8_A8: 2997 case FORMAT_R5G6B5: 2998 case FORMAT_X1R5G5B5: 2999 case FORMAT_A1R5G5B5: 3000 case FORMAT_A4R4G4B4: 3001 case FORMAT_DXT1: 3002 case FORMAT_DXT3: 3003 case FORMAT_DXT5: 3004 case FORMAT_ATI1: 3005 case FORMAT_ATI2: 3006 return true; 3007 default: 3008 return false; 3009 } 3010 } 3011 isSRGBwritable(Format format)3012 bool Surface::isSRGBwritable(Format format) 3013 { 3014 // Keep in sync with Capabilities::isSRGBwritable 3015 switch(format) 3016 { 3017 case FORMAT_NULL: 3018 case FORMAT_A8R8G8B8: 3019 case FORMAT_X8R8G8B8: 3020 case FORMAT_A8B8G8R8: 3021 case FORMAT_X8B8G8R8: 3022 case FORMAT_SRGB8_X8: 3023 case FORMAT_SRGB8_A8: 3024 case FORMAT_R5G6B5: 3025 return true; 3026 default: 3027 return false; 3028 } 3029 } 3030 isSRGBformat(Format format)3031 bool Surface::isSRGBformat(Format format) 3032 { 3033 switch(format) 3034 { 3035 case FORMAT_SRGB8_X8: 3036 case FORMAT_SRGB8_A8: 3037 return true; 3038 default: 3039 return false; 3040 } 3041 } 3042 isCompressed(Format format)3043 bool Surface::isCompressed(Format format) 3044 { 3045 switch(format) 3046 { 3047 case FORMAT_DXT1: 3048 case FORMAT_DXT3: 3049 case FORMAT_DXT5: 3050 case FORMAT_ATI1: 3051 case FORMAT_ATI2: 3052 case FORMAT_ETC1: 3053 case FORMAT_R11_EAC: 3054 case FORMAT_SIGNED_R11_EAC: 3055 case FORMAT_RG11_EAC: 3056 case FORMAT_SIGNED_RG11_EAC: 3057 case FORMAT_RGB8_ETC2: 3058 case FORMAT_SRGB8_ETC2: 3059 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3060 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3061 case FORMAT_RGBA8_ETC2_EAC: 3062 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3063 case FORMAT_RGBA_ASTC_4x4_KHR: 3064 case FORMAT_RGBA_ASTC_5x4_KHR: 3065 case FORMAT_RGBA_ASTC_5x5_KHR: 3066 case FORMAT_RGBA_ASTC_6x5_KHR: 3067 case FORMAT_RGBA_ASTC_6x6_KHR: 3068 case FORMAT_RGBA_ASTC_8x5_KHR: 3069 case FORMAT_RGBA_ASTC_8x6_KHR: 3070 case FORMAT_RGBA_ASTC_8x8_KHR: 3071 case FORMAT_RGBA_ASTC_10x5_KHR: 3072 case FORMAT_RGBA_ASTC_10x6_KHR: 3073 case FORMAT_RGBA_ASTC_10x8_KHR: 3074 case FORMAT_RGBA_ASTC_10x10_KHR: 3075 case FORMAT_RGBA_ASTC_12x10_KHR: 3076 case FORMAT_RGBA_ASTC_12x12_KHR: 3077 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3078 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3079 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3080 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3081 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3082 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3083 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3084 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3085 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3086 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3087 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3088 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3089 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3090 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3091 return true; 3092 default: 3093 return false; 3094 } 3095 } 3096 isSignedNonNormalizedInteger(Format format)3097 bool Surface::isSignedNonNormalizedInteger(Format format) 3098 { 3099 switch(format) 3100 { 3101 case FORMAT_A8B8G8R8I: 3102 case FORMAT_X8B8G8R8I: 3103 case FORMAT_G8R8I: 3104 case FORMAT_R8I: 3105 case FORMAT_A16B16G16R16I: 3106 case FORMAT_X16B16G16R16I: 3107 case FORMAT_G16R16I: 3108 case FORMAT_R16I: 3109 case FORMAT_A32B32G32R32I: 3110 case FORMAT_X32B32G32R32I: 3111 case FORMAT_G32R32I: 3112 case FORMAT_R32I: 3113 return true; 3114 default: 3115 return false; 3116 } 3117 } 3118 isUnsignedNonNormalizedInteger(Format format)3119 bool Surface::isUnsignedNonNormalizedInteger(Format format) 3120 { 3121 switch(format) 3122 { 3123 case FORMAT_A8B8G8R8UI: 3124 case FORMAT_X8B8G8R8UI: 3125 case FORMAT_G8R8UI: 3126 case FORMAT_R8UI: 3127 case FORMAT_A16B16G16R16UI: 3128 case FORMAT_X16B16G16R16UI: 3129 case FORMAT_G16R16UI: 3130 case FORMAT_R16UI: 3131 case FORMAT_A32B32G32R32UI: 3132 case FORMAT_X32B32G32R32UI: 3133 case FORMAT_G32R32UI: 3134 case FORMAT_R32UI: 3135 return true; 3136 default: 3137 return false; 3138 } 3139 } 3140 isNonNormalizedInteger(Format format)3141 bool Surface::isNonNormalizedInteger(Format format) 3142 { 3143 return isSignedNonNormalizedInteger(format) || 3144 isUnsignedNonNormalizedInteger(format); 3145 } 3146 isNormalizedInteger(Format format)3147 bool Surface::isNormalizedInteger(Format format) 3148 { 3149 return !isFloatFormat(format) && 3150 !isNonNormalizedInteger(format) && 3151 !isCompressed(format) && 3152 !isDepth(format) && 3153 !isStencil(format); 3154 } 3155 componentCount(Format format)3156 int Surface::componentCount(Format format) 3157 { 3158 switch(format) 3159 { 3160 case FORMAT_R5G6B5: return 3; 3161 case FORMAT_X8R8G8B8: return 3; 3162 case FORMAT_X8B8G8R8I: return 3; 3163 case FORMAT_X8B8G8R8: return 3; 3164 case FORMAT_A8R8G8B8: return 4; 3165 case FORMAT_SRGB8_X8: return 3; 3166 case FORMAT_SRGB8_A8: return 4; 3167 case FORMAT_A8B8G8R8I: return 4; 3168 case FORMAT_A8B8G8R8: return 4; 3169 case FORMAT_G8R8I: return 2; 3170 case FORMAT_G8R8: return 2; 3171 case FORMAT_R8_SNORM: return 1; 3172 case FORMAT_G8R8_SNORM: return 2; 3173 case FORMAT_X8B8G8R8_SNORM:return 3; 3174 case FORMAT_A8B8G8R8_SNORM:return 4; 3175 case FORMAT_R8UI: return 1; 3176 case FORMAT_G8R8UI: return 2; 3177 case FORMAT_X8B8G8R8UI: return 3; 3178 case FORMAT_A8B8G8R8UI: return 4; 3179 case FORMAT_A2B10G10R10: return 4; 3180 case FORMAT_A2B10G10R10UI: return 4; 3181 case FORMAT_G16R16I: return 2; 3182 case FORMAT_G16R16UI: return 2; 3183 case FORMAT_G16R16: return 2; 3184 case FORMAT_G32R32I: return 2; 3185 case FORMAT_G32R32UI: return 2; 3186 case FORMAT_X16B16G16R16I: return 3; 3187 case FORMAT_X16B16G16R16UI: return 3; 3188 case FORMAT_A16B16G16R16I: return 4; 3189 case FORMAT_A16B16G16R16UI: return 4; 3190 case FORMAT_A16B16G16R16: return 4; 3191 case FORMAT_X32B32G32R32I: return 3; 3192 case FORMAT_X32B32G32R32UI: return 3; 3193 case FORMAT_A32B32G32R32I: return 4; 3194 case FORMAT_A32B32G32R32UI: return 4; 3195 case FORMAT_V8U8: return 2; 3196 case FORMAT_Q8W8V8U8: return 4; 3197 case FORMAT_X8L8V8U8: return 3; 3198 case FORMAT_V16U16: return 2; 3199 case FORMAT_A16W16V16U16: return 4; 3200 case FORMAT_Q16W16V16U16: return 4; 3201 case FORMAT_R32F: return 1; 3202 case FORMAT_G32R32F: return 2; 3203 case FORMAT_X32B32G32R32F: return 3; 3204 case FORMAT_A32B32G32R32F: return 4; 3205 case FORMAT_X32B32G32R32F_UNSIGNED: return 3; 3206 case FORMAT_D32F: return 1; 3207 case FORMAT_D32FS8: return 1; 3208 case FORMAT_D32F_LOCKABLE: return 1; 3209 case FORMAT_D32FS8_TEXTURE: return 1; 3210 case FORMAT_D32F_SHADOW: return 1; 3211 case FORMAT_D32FS8_SHADOW: return 1; 3212 case FORMAT_A8: return 1; 3213 case FORMAT_R8I: return 1; 3214 case FORMAT_R8: return 1; 3215 case FORMAT_R16I: return 1; 3216 case FORMAT_R16UI: return 1; 3217 case FORMAT_R32I: return 1; 3218 case FORMAT_R32UI: return 1; 3219 case FORMAT_L8: return 1; 3220 case FORMAT_L16: return 1; 3221 case FORMAT_A8L8: return 2; 3222 case FORMAT_YV12_BT601: return 3; 3223 case FORMAT_YV12_BT709: return 3; 3224 case FORMAT_YV12_JFIF: return 3; 3225 default: 3226 ASSERT(false); 3227 } 3228 3229 return 1; 3230 } 3231 allocateBuffer(int width,int height,int depth,int border,int samples,Format format)3232 void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format) 3233 { 3234 return allocate(size(width, height, depth, border, samples, format)); 3235 } 3236 memfill4(void * buffer,int pattern,int bytes)3237 void Surface::memfill4(void *buffer, int pattern, int bytes) 3238 { 3239 while((size_t)buffer & 0x1 && bytes >= 1) 3240 { 3241 *(char*)buffer = (char)pattern; 3242 (char*&)buffer += 1; 3243 bytes -= 1; 3244 } 3245 3246 while((size_t)buffer & 0x3 && bytes >= 2) 3247 { 3248 *(short*)buffer = (short)pattern; 3249 (short*&)buffer += 1; 3250 bytes -= 2; 3251 } 3252 3253 #if defined(__i386__) || defined(__x86_64__) 3254 if(CPUID::supportsSSE()) 3255 { 3256 while((size_t)buffer & 0xF && bytes >= 4) 3257 { 3258 *(int*)buffer = pattern; 3259 (int*&)buffer += 1; 3260 bytes -= 4; 3261 } 3262 3263 __m128 quad = _mm_set_ps1((float&)pattern); 3264 3265 float *pointer = (float*)buffer; 3266 int qxwords = bytes / 64; 3267 bytes -= qxwords * 64; 3268 3269 while(qxwords--) 3270 { 3271 _mm_stream_ps(pointer + 0, quad); 3272 _mm_stream_ps(pointer + 4, quad); 3273 _mm_stream_ps(pointer + 8, quad); 3274 _mm_stream_ps(pointer + 12, quad); 3275 3276 pointer += 16; 3277 } 3278 3279 buffer = pointer; 3280 } 3281 #endif 3282 3283 while(bytes >= 4) 3284 { 3285 *(int*)buffer = (int)pattern; 3286 (int*&)buffer += 1; 3287 bytes -= 4; 3288 } 3289 3290 while(bytes >= 2) 3291 { 3292 *(short*)buffer = (short)pattern; 3293 (short*&)buffer += 1; 3294 bytes -= 2; 3295 } 3296 3297 while(bytes >= 1) 3298 { 3299 *(char*)buffer = (char)pattern; 3300 (char*&)buffer += 1; 3301 bytes -= 1; 3302 } 3303 } 3304 sync()3305 void Surface::sync() 3306 { 3307 resource->lock(EXCLUSIVE); 3308 resource->unlock(); 3309 } 3310 isEntire(const Rect & rect) const3311 bool Surface::isEntire(const Rect& rect) const 3312 { 3313 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1); 3314 } 3315 getRect() const3316 Rect Surface::getRect() const 3317 { 3318 return Rect(0, 0, internal.width, internal.height); 3319 } 3320 clearDepth(float depth,int x0,int y0,int width,int height)3321 void Surface::clearDepth(float depth, int x0, int y0, int width, int height) 3322 { 3323 if(width == 0 || height == 0) 3324 { 3325 return; 3326 } 3327 3328 if(internal.format == FORMAT_NULL) 3329 { 3330 return; 3331 } 3332 3333 // Not overlapping 3334 if(x0 > internal.width) return; 3335 if(y0 > internal.height) return; 3336 if(x0 + width < 0) return; 3337 if(y0 + height < 0) return; 3338 3339 // Clip against dimensions 3340 if(x0 < 0) {width += x0; x0 = 0;} 3341 if(x0 + width > internal.width) width = internal.width - x0; 3342 if(y0 < 0) {height += y0; y0 = 0;} 3343 if(y0 + height > internal.height) height = internal.height - y0; 3344 3345 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 3346 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 3347 3348 int x1 = x0 + width; 3349 int y1 = y0 + height; 3350 3351 if(!hasQuadLayout(internal.format)) 3352 { 3353 float *target = (float*)lockInternal(x0, y0, 0, lock, PUBLIC); 3354 3355 for(int z = 0; z < internal.samples; z++) 3356 { 3357 float *row = target; 3358 for(int y = y0; y < y1; y++) 3359 { 3360 memfill4(row, (int&)depth, width * sizeof(float)); 3361 row += internal.pitchP; 3362 } 3363 target += internal.sliceP; 3364 } 3365 3366 unlockInternal(); 3367 } 3368 else // Quad layout 3369 { 3370 if(complementaryDepthBuffer) 3371 { 3372 depth = 1 - depth; 3373 } 3374 3375 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC); 3376 3377 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3378 int oddX1 = (x1 & ~1) * 2; 3379 int evenX0 = ((x0 + 1) & ~1) * 2; 3380 int evenBytes = (oddX1 - evenX0) * sizeof(float); 3381 3382 for(int z = 0; z < internal.samples; z++) 3383 { 3384 for(int y = y0; y < y1; y++) 3385 { 3386 float *target = buffer + (y & ~1) * internal.pitchP + (y & 1) * 2; 3387 3388 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 3389 { 3390 if((x0 & 1) != 0) 3391 { 3392 target[oddX0 + 0] = depth; 3393 target[oddX0 + 2] = depth; 3394 } 3395 3396 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4) 3397 // { 3398 // target[x2 + 0] = depth; 3399 // target[x2 + 1] = depth; 3400 // target[x2 + 2] = depth; 3401 // target[x2 + 3] = depth; 3402 // } 3403 3404 // __asm 3405 // { 3406 // movss xmm0, depth 3407 // shufps xmm0, xmm0, 0x00 3408 // 3409 // mov eax, x0 3410 // add eax, 1 3411 // and eax, 0xFFFFFFFE 3412 // cmp eax, x1 3413 // jge qEnd 3414 // 3415 // mov edi, target 3416 // 3417 // qLoop: 3418 // movntps [edi+8*eax], xmm0 3419 // 3420 // add eax, 2 3421 // cmp eax, x1 3422 // jl qLoop 3423 // qEnd: 3424 // } 3425 3426 memfill4(&target[evenX0], (int&)depth, evenBytes); 3427 3428 if((x1 & 1) != 0) 3429 { 3430 target[oddX1 + 0] = depth; 3431 target[oddX1 + 2] = depth; 3432 } 3433 3434 y++; 3435 } 3436 else 3437 { 3438 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3439 { 3440 target[i] = depth; 3441 } 3442 } 3443 } 3444 3445 buffer += internal.sliceP; 3446 } 3447 3448 unlockInternal(); 3449 } 3450 } 3451 clearStencil(unsigned char s,unsigned char mask,int x0,int y0,int width,int height)3452 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height) 3453 { 3454 if(mask == 0 || width == 0 || height == 0) 3455 { 3456 return; 3457 } 3458 3459 if(stencil.format == FORMAT_NULL) 3460 { 3461 return; 3462 } 3463 3464 // Not overlapping 3465 if(x0 > internal.width) return; 3466 if(y0 > internal.height) return; 3467 if(x0 + width < 0) return; 3468 if(y0 + height < 0) return; 3469 3470 // Clip against dimensions 3471 if(x0 < 0) {width += x0; x0 = 0;} 3472 if(x0 + width > internal.width) width = internal.width - x0; 3473 if(y0 < 0) {height += y0; y0 = 0;} 3474 if(y0 + height > internal.height) height = internal.height - y0; 3475 3476 int x1 = x0 + width; 3477 int y1 = y0 + height; 3478 3479 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3480 int oddX1 = (x1 & ~1) * 2; 3481 int evenX0 = ((x0 + 1) & ~1) * 2; 3482 int evenBytes = oddX1 - evenX0; 3483 3484 unsigned char maskedS = s & mask; 3485 unsigned char invMask = ~mask; 3486 unsigned int fill = maskedS; 3487 fill = fill | (fill << 8) | (fill << 16) | (fill << 24); 3488 3489 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC); 3490 3491 // Stencil buffers are assumed to use quad layout 3492 for(int z = 0; z < stencil.samples; z++) 3493 { 3494 for(int y = y0; y < y1; y++) 3495 { 3496 char *target = buffer + (y & ~1) * stencil.pitchP + (y & 1) * 2; 3497 3498 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once 3499 { 3500 if((x0 & 1) != 0) 3501 { 3502 target[oddX0 + 0] = fill; 3503 target[oddX0 + 2] = fill; 3504 } 3505 3506 memfill4(&target[evenX0], fill, evenBytes); 3507 3508 if((x1 & 1) != 0) 3509 { 3510 target[oddX1 + 0] = fill; 3511 target[oddX1 + 2] = fill; 3512 } 3513 3514 y++; 3515 } 3516 else 3517 { 3518 for(int x = x0; x < x1; x++) 3519 { 3520 int i = (x & ~1) * 2 + (x & 1); 3521 target[i] = maskedS | (target[i] & invMask); 3522 } 3523 } 3524 } 3525 3526 buffer += stencil.sliceP; 3527 } 3528 3529 unlockStencil(); 3530 } 3531 fill(const Color<float> & color,int x0,int y0,int width,int height)3532 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height) 3533 { 3534 unsigned char *row; 3535 Buffer *buffer; 3536 3537 if(internal.dirty) 3538 { 3539 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3540 buffer = &internal; 3541 } 3542 else 3543 { 3544 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3545 buffer = &external; 3546 } 3547 3548 if(buffer->bytes <= 4) 3549 { 3550 int c; 3551 buffer->write(&c, color); 3552 3553 if(buffer->bytes <= 1) c = (c << 8) | c; 3554 if(buffer->bytes <= 2) c = (c << 16) | c; 3555 3556 for(int y = 0; y < height; y++) 3557 { 3558 memfill4(row, c, width * buffer->bytes); 3559 3560 row += buffer->pitchB; 3561 } 3562 } 3563 else // Generic 3564 { 3565 for(int y = 0; y < height; y++) 3566 { 3567 unsigned char *element = row; 3568 3569 for(int x = 0; x < width; x++) 3570 { 3571 buffer->write(element, color); 3572 3573 element += buffer->bytes; 3574 } 3575 3576 row += buffer->pitchB; 3577 } 3578 } 3579 3580 if(buffer == &internal) 3581 { 3582 unlockInternal(); 3583 } 3584 else 3585 { 3586 unlockExternal(); 3587 } 3588 } 3589 copyInternal(const Surface * source,int x,int y,float srcX,float srcY,bool filter)3590 void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter) 3591 { 3592 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3593 3594 sw::Color<float> color; 3595 3596 if(!filter) 3597 { 3598 color = source->internal.read((int)srcX, (int)srcY, 0); 3599 } 3600 else // Bilinear filtering 3601 { 3602 color = source->internal.sample(srcX, srcY, 0); 3603 } 3604 3605 internal.write(x, y, color); 3606 } 3607 copyInternal(const Surface * source,int x,int y,int z,float srcX,float srcY,float srcZ,bool filter)3608 void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter) 3609 { 3610 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3611 3612 sw::Color<float> color; 3613 3614 if(!filter) 3615 { 3616 color = source->internal.read((int)srcX, (int)srcY, int(srcZ)); 3617 } 3618 else // Bilinear filtering 3619 { 3620 color = source->internal.sample(srcX, srcY, srcZ); 3621 } 3622 3623 internal.write(x, y, z, color); 3624 } 3625 copyCubeEdge(Edge dstEdge,Surface * src,Edge srcEdge)3626 void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge) 3627 { 3628 Surface *dst = this; 3629 3630 // Figure out if the edges to be copied in reverse order respectively from one another 3631 // The copy should be reversed whenever the same edges are contiguous or if we're 3632 // copying top <-> right or bottom <-> left. This is explained by the layout, which is: 3633 // 3634 // | +y | 3635 // | -x | +z | +x | -z | 3636 // | -y | 3637 3638 bool reverse = (srcEdge == dstEdge) || 3639 ((srcEdge == TOP) && (dstEdge == RIGHT)) || 3640 ((srcEdge == RIGHT) && (dstEdge == TOP)) || 3641 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) || 3642 ((srcEdge == LEFT) && (dstEdge == BOTTOM)); 3643 3644 int srcBytes = src->bytes(src->Surface::getInternalFormat()); 3645 int srcPitch = src->getInternalPitchB(); 3646 int dstBytes = dst->bytes(dst->Surface::getInternalFormat()); 3647 int dstPitch = dst->getInternalPitchB(); 3648 3649 int srcW = src->getWidth(); 3650 int srcH = src->getHeight(); 3651 int dstW = dst->getWidth(); 3652 int dstH = dst->getHeight(); 3653 3654 ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes); 3655 3656 // Src is expressed in the regular [0, width-1], [0, height-1] space 3657 int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch; 3658 int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0)); 3659 3660 // Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space 3661 int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1); 3662 int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta); 3663 3664 char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart; 3665 char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart; 3666 3667 for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta) 3668 { 3669 memcpy(dstBuf, srcBuf, srcBytes); 3670 } 3671 3672 if(dstEdge == LEFT || dstEdge == RIGHT) 3673 { 3674 // TOP and BOTTOM are already set, let's average out the corners 3675 int x0 = (dstEdge == RIGHT) ? dstW : -1; 3676 int y0 = -1; 3677 int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0; 3678 int y1 = 0; 3679 dst->computeCubeCorner(x0, y0, x1, y1); 3680 y0 = dstH; 3681 y1 = dstH - 1; 3682 dst->computeCubeCorner(x0, y0, x1, y1); 3683 } 3684 3685 src->unlockInternal(); 3686 dst->unlockInternal(); 3687 } 3688 computeCubeCorner(int x0,int y0,int x1,int y1)3689 void Surface::computeCubeCorner(int x0, int y0, int x1, int y1) 3690 { 3691 ASSERT(internal.lock != LOCK_UNLOCKED); 3692 3693 sw::Color<float> color = internal.read(x0, y1); 3694 color += internal.read(x1, y0); 3695 color += internal.read(x1, y1); 3696 color *= (1.0f / 3.0f); 3697 3698 internal.write(x0, y0, color); 3699 } 3700 hasStencil() const3701 bool Surface::hasStencil() const 3702 { 3703 return isStencil(external.format); 3704 } 3705 hasDepth() const3706 bool Surface::hasDepth() const 3707 { 3708 return isDepth(external.format); 3709 } 3710 hasPalette() const3711 bool Surface::hasPalette() const 3712 { 3713 return isPalette(external.format); 3714 } 3715 isRenderTarget() const3716 bool Surface::isRenderTarget() const 3717 { 3718 return renderTarget; 3719 } 3720 hasDirtyContents() const3721 bool Surface::hasDirtyContents() const 3722 { 3723 return dirtyContents; 3724 } 3725 markContentsClean()3726 void Surface::markContentsClean() 3727 { 3728 dirtyContents = false; 3729 } 3730 getResource()3731 Resource *Surface::getResource() 3732 { 3733 return resource; 3734 } 3735 identicalBuffers() const3736 bool Surface::identicalBuffers() const 3737 { 3738 return external.format == internal.format && 3739 external.width == internal.width && 3740 external.height == internal.height && 3741 external.depth == internal.depth && 3742 external.pitchB == internal.pitchB && 3743 external.sliceB == internal.sliceB && 3744 external.border == internal.border && 3745 external.samples == internal.samples; 3746 } 3747 selectInternalFormat(Format format) const3748 Format Surface::selectInternalFormat(Format format) const 3749 { 3750 switch(format) 3751 { 3752 case FORMAT_NULL: 3753 return FORMAT_NULL; 3754 case FORMAT_P8: 3755 case FORMAT_A8P8: 3756 case FORMAT_A4R4G4B4: 3757 case FORMAT_A1R5G5B5: 3758 case FORMAT_A8R3G3B2: 3759 return FORMAT_A8R8G8B8; 3760 case FORMAT_A8: 3761 return FORMAT_A8; 3762 case FORMAT_R8I: 3763 return FORMAT_R8I; 3764 case FORMAT_R8UI: 3765 return FORMAT_R8UI; 3766 case FORMAT_R8_SNORM: 3767 return FORMAT_R8_SNORM; 3768 case FORMAT_R8: 3769 return FORMAT_R8; 3770 case FORMAT_R16I: 3771 return FORMAT_R16I; 3772 case FORMAT_R16UI: 3773 return FORMAT_R16UI; 3774 case FORMAT_R32I: 3775 return FORMAT_R32I; 3776 case FORMAT_R32UI: 3777 return FORMAT_R32UI; 3778 case FORMAT_X16B16G16R16I: 3779 return FORMAT_X16B16G16R16I; 3780 case FORMAT_A16B16G16R16I: 3781 return FORMAT_A16B16G16R16I; 3782 case FORMAT_X16B16G16R16UI: 3783 return FORMAT_X16B16G16R16UI; 3784 case FORMAT_A16B16G16R16UI: 3785 return FORMAT_A16B16G16R16UI; 3786 case FORMAT_A2R10G10B10: 3787 case FORMAT_A2B10G10R10: 3788 case FORMAT_A16B16G16R16: 3789 return FORMAT_A16B16G16R16; 3790 case FORMAT_A2B10G10R10UI: 3791 return FORMAT_A16B16G16R16UI; 3792 case FORMAT_X32B32G32R32I: 3793 return FORMAT_X32B32G32R32I; 3794 case FORMAT_A32B32G32R32I: 3795 return FORMAT_A32B32G32R32I; 3796 case FORMAT_X32B32G32R32UI: 3797 return FORMAT_X32B32G32R32UI; 3798 case FORMAT_A32B32G32R32UI: 3799 return FORMAT_A32B32G32R32UI; 3800 case FORMAT_G8R8I: 3801 return FORMAT_G8R8I; 3802 case FORMAT_G8R8UI: 3803 return FORMAT_G8R8UI; 3804 case FORMAT_G8R8_SNORM: 3805 return FORMAT_G8R8_SNORM; 3806 case FORMAT_G8R8: 3807 return FORMAT_G8R8; 3808 case FORMAT_G16R16I: 3809 return FORMAT_G16R16I; 3810 case FORMAT_G16R16UI: 3811 return FORMAT_G16R16UI; 3812 case FORMAT_G16R16: 3813 return FORMAT_G16R16; 3814 case FORMAT_G32R32I: 3815 return FORMAT_G32R32I; 3816 case FORMAT_G32R32UI: 3817 return FORMAT_G32R32UI; 3818 case FORMAT_A8R8G8B8: 3819 if(lockable || !quadLayoutEnabled) 3820 { 3821 return FORMAT_A8R8G8B8; 3822 } 3823 else 3824 { 3825 return FORMAT_A8G8R8B8Q; 3826 } 3827 case FORMAT_A8B8G8R8I: 3828 return FORMAT_A8B8G8R8I; 3829 case FORMAT_A8B8G8R8UI: 3830 return FORMAT_A8B8G8R8UI; 3831 case FORMAT_A8B8G8R8_SNORM: 3832 return FORMAT_A8B8G8R8_SNORM; 3833 case FORMAT_R5G5B5A1: 3834 case FORMAT_R4G4B4A4: 3835 case FORMAT_A8B8G8R8: 3836 return FORMAT_A8B8G8R8; 3837 case FORMAT_R5G6B5: 3838 return FORMAT_R5G6B5; 3839 case FORMAT_R3G3B2: 3840 case FORMAT_R8G8B8: 3841 case FORMAT_X4R4G4B4: 3842 case FORMAT_X1R5G5B5: 3843 case FORMAT_X8R8G8B8: 3844 if(lockable || !quadLayoutEnabled) 3845 { 3846 return FORMAT_X8R8G8B8; 3847 } 3848 else 3849 { 3850 return FORMAT_X8G8R8B8Q; 3851 } 3852 case FORMAT_X8B8G8R8I: 3853 return FORMAT_X8B8G8R8I; 3854 case FORMAT_X8B8G8R8UI: 3855 return FORMAT_X8B8G8R8UI; 3856 case FORMAT_X8B8G8R8_SNORM: 3857 return FORMAT_X8B8G8R8_SNORM; 3858 case FORMAT_B8G8R8: 3859 case FORMAT_X8B8G8R8: 3860 return FORMAT_X8B8G8R8; 3861 case FORMAT_SRGB8_X8: 3862 return FORMAT_SRGB8_X8; 3863 case FORMAT_SRGB8_A8: 3864 return FORMAT_SRGB8_A8; 3865 // Compressed formats 3866 case FORMAT_DXT1: 3867 case FORMAT_DXT3: 3868 case FORMAT_DXT5: 3869 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3870 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3871 case FORMAT_RGBA8_ETC2_EAC: 3872 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3873 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3874 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3875 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3876 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3877 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3878 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3879 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3880 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3881 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3882 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3883 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3884 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3885 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3886 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3887 return FORMAT_A8R8G8B8; 3888 case FORMAT_RGBA_ASTC_4x4_KHR: 3889 case FORMAT_RGBA_ASTC_5x4_KHR: 3890 case FORMAT_RGBA_ASTC_5x5_KHR: 3891 case FORMAT_RGBA_ASTC_6x5_KHR: 3892 case FORMAT_RGBA_ASTC_6x6_KHR: 3893 case FORMAT_RGBA_ASTC_8x5_KHR: 3894 case FORMAT_RGBA_ASTC_8x6_KHR: 3895 case FORMAT_RGBA_ASTC_8x8_KHR: 3896 case FORMAT_RGBA_ASTC_10x5_KHR: 3897 case FORMAT_RGBA_ASTC_10x6_KHR: 3898 case FORMAT_RGBA_ASTC_10x8_KHR: 3899 case FORMAT_RGBA_ASTC_10x10_KHR: 3900 case FORMAT_RGBA_ASTC_12x10_KHR: 3901 case FORMAT_RGBA_ASTC_12x12_KHR: 3902 // ASTC supports HDR, so a floating point format is required to represent it properly 3903 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported 3904 case FORMAT_ATI1: 3905 return FORMAT_R8; 3906 case FORMAT_R11_EAC: 3907 case FORMAT_SIGNED_R11_EAC: 3908 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient 3909 case FORMAT_ATI2: 3910 return FORMAT_G8R8; 3911 case FORMAT_RG11_EAC: 3912 case FORMAT_SIGNED_RG11_EAC: 3913 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient 3914 case FORMAT_ETC1: 3915 case FORMAT_RGB8_ETC2: 3916 case FORMAT_SRGB8_ETC2: 3917 return FORMAT_X8R8G8B8; 3918 // Bumpmap formats 3919 case FORMAT_V8U8: return FORMAT_V8U8; 3920 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8; 3921 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8; 3922 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8; 3923 case FORMAT_V16U16: return FORMAT_V16U16; 3924 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16; 3925 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16; 3926 // Floating-point formats 3927 case FORMAT_A16F: return FORMAT_A32B32G32R32F; 3928 case FORMAT_R16F: return FORMAT_R32F; 3929 case FORMAT_G16R16F: return FORMAT_G32R32F; 3930 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F; 3931 case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F; 3932 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; 3933 case FORMAT_X16B16G16R16F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED; 3934 case FORMAT_A32F: return FORMAT_A32B32G32R32F; 3935 case FORMAT_R32F: return FORMAT_R32F; 3936 case FORMAT_G32R32F: return FORMAT_G32R32F; 3937 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F; 3938 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F; 3939 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F; 3940 case FORMAT_X32B32G32R32F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED; 3941 // Luminance formats 3942 case FORMAT_L8: return FORMAT_L8; 3943 case FORMAT_A4L4: return FORMAT_A8L8; 3944 case FORMAT_L16: return FORMAT_L16; 3945 case FORMAT_A8L8: return FORMAT_A8L8; 3946 case FORMAT_L16F: return FORMAT_X32B32G32R32F; 3947 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F; 3948 case FORMAT_L32F: return FORMAT_X32B32G32R32F; 3949 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F; 3950 // Depth/stencil formats 3951 case FORMAT_D16: 3952 case FORMAT_D32: 3953 case FORMAT_D24X8: 3954 if(hasParent) // Texture 3955 { 3956 return FORMAT_D32F_SHADOW; 3957 } 3958 else if(complementaryDepthBuffer) 3959 { 3960 return FORMAT_D32F_COMPLEMENTARY; 3961 } 3962 else 3963 { 3964 return FORMAT_D32F; 3965 } 3966 case FORMAT_D24S8: 3967 case FORMAT_D24FS8: 3968 if(hasParent) // Texture 3969 { 3970 return FORMAT_D32FS8_SHADOW; 3971 } 3972 else if(complementaryDepthBuffer) 3973 { 3974 return FORMAT_D32FS8_COMPLEMENTARY; 3975 } 3976 else 3977 { 3978 return FORMAT_D32FS8; 3979 } 3980 case FORMAT_D32F: return FORMAT_D32F; 3981 case FORMAT_D32FS8: return FORMAT_D32FS8; 3982 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE; 3983 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE; 3984 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; 3985 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; 3986 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; 3987 case FORMAT_S8: return FORMAT_S8; 3988 // YUV formats 3989 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; 3990 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; 3991 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; 3992 default: 3993 ASSERT(false); 3994 } 3995 3996 return FORMAT_NULL; 3997 } 3998 setTexturePalette(unsigned int * palette)3999 void Surface::setTexturePalette(unsigned int *palette) 4000 { 4001 Surface::palette = palette; 4002 Surface::paletteID++; 4003 } 4004 resolve()4005 void Surface::resolve() 4006 { 4007 if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL) 4008 { 4009 return; 4010 } 4011 4012 ASSERT(internal.depth == 1); // Unimplemented 4013 4014 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE); 4015 4016 int width = internal.width; 4017 int height = internal.height; 4018 int pitch = internal.pitchB; 4019 int slice = internal.sliceB; 4020 4021 unsigned char *source0 = (unsigned char*)source; 4022 unsigned char *source1 = source0 + slice; 4023 unsigned char *source2 = source1 + slice; 4024 unsigned char *source3 = source2 + slice; 4025 unsigned char *source4 = source3 + slice; 4026 unsigned char *source5 = source4 + slice; 4027 unsigned char *source6 = source5 + slice; 4028 unsigned char *source7 = source6 + slice; 4029 unsigned char *source8 = source7 + slice; 4030 unsigned char *source9 = source8 + slice; 4031 unsigned char *sourceA = source9 + slice; 4032 unsigned char *sourceB = sourceA + slice; 4033 unsigned char *sourceC = sourceB + slice; 4034 unsigned char *sourceD = sourceC + slice; 4035 unsigned char *sourceE = sourceD + slice; 4036 unsigned char *sourceF = sourceE + slice; 4037 4038 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || 4039 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 || 4040 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8) 4041 { 4042 #if defined(__i386__) || defined(__x86_64__) 4043 if(CPUID::supportsSSE2() && (width % 4) == 0) 4044 { 4045 if(internal.samples == 2) 4046 { 4047 for(int y = 0; y < height; y++) 4048 { 4049 for(int x = 0; x < width; x += 4) 4050 { 4051 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4052 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4053 4054 c0 = _mm_avg_epu8(c0, c1); 4055 4056 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4057 } 4058 4059 source0 += pitch; 4060 source1 += pitch; 4061 } 4062 } 4063 else if(internal.samples == 4) 4064 { 4065 for(int y = 0; y < height; y++) 4066 { 4067 for(int x = 0; x < width; x += 4) 4068 { 4069 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4070 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4071 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4072 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4073 4074 c0 = _mm_avg_epu8(c0, c1); 4075 c2 = _mm_avg_epu8(c2, c3); 4076 c0 = _mm_avg_epu8(c0, c2); 4077 4078 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4079 } 4080 4081 source0 += pitch; 4082 source1 += pitch; 4083 source2 += pitch; 4084 source3 += pitch; 4085 } 4086 } 4087 else if(internal.samples == 8) 4088 { 4089 for(int y = 0; y < height; y++) 4090 { 4091 for(int x = 0; x < width; x += 4) 4092 { 4093 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4094 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4095 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4096 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4097 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4098 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4099 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4100 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4101 4102 c0 = _mm_avg_epu8(c0, c1); 4103 c2 = _mm_avg_epu8(c2, c3); 4104 c4 = _mm_avg_epu8(c4, c5); 4105 c6 = _mm_avg_epu8(c6, c7); 4106 c0 = _mm_avg_epu8(c0, c2); 4107 c4 = _mm_avg_epu8(c4, c6); 4108 c0 = _mm_avg_epu8(c0, c4); 4109 4110 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4111 } 4112 4113 source0 += pitch; 4114 source1 += pitch; 4115 source2 += pitch; 4116 source3 += pitch; 4117 source4 += pitch; 4118 source5 += pitch; 4119 source6 += pitch; 4120 source7 += pitch; 4121 } 4122 } 4123 else if(internal.samples == 16) 4124 { 4125 for(int y = 0; y < height; y++) 4126 { 4127 for(int x = 0; x < width; x += 4) 4128 { 4129 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4130 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4131 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4132 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4133 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4134 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4135 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4136 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4137 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4138 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4139 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4140 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4141 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4142 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4143 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4144 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4145 4146 c0 = _mm_avg_epu8(c0, c1); 4147 c2 = _mm_avg_epu8(c2, c3); 4148 c4 = _mm_avg_epu8(c4, c5); 4149 c6 = _mm_avg_epu8(c6, c7); 4150 c8 = _mm_avg_epu8(c8, c9); 4151 cA = _mm_avg_epu8(cA, cB); 4152 cC = _mm_avg_epu8(cC, cD); 4153 cE = _mm_avg_epu8(cE, cF); 4154 c0 = _mm_avg_epu8(c0, c2); 4155 c4 = _mm_avg_epu8(c4, c6); 4156 c8 = _mm_avg_epu8(c8, cA); 4157 cC = _mm_avg_epu8(cC, cE); 4158 c0 = _mm_avg_epu8(c0, c4); 4159 c8 = _mm_avg_epu8(c8, cC); 4160 c0 = _mm_avg_epu8(c0, c8); 4161 4162 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4163 } 4164 4165 source0 += pitch; 4166 source1 += pitch; 4167 source2 += pitch; 4168 source3 += pitch; 4169 source4 += pitch; 4170 source5 += pitch; 4171 source6 += pitch; 4172 source7 += pitch; 4173 source8 += pitch; 4174 source9 += pitch; 4175 sourceA += pitch; 4176 sourceB += pitch; 4177 sourceC += pitch; 4178 sourceD += pitch; 4179 sourceE += pitch; 4180 sourceF += pitch; 4181 } 4182 } 4183 else ASSERT(false); 4184 } 4185 else 4186 #endif 4187 { 4188 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101)) 4189 4190 if(internal.samples == 2) 4191 { 4192 for(int y = 0; y < height; y++) 4193 { 4194 for(int x = 0; x < width; x++) 4195 { 4196 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4197 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4198 4199 c0 = AVERAGE(c0, c1); 4200 4201 *(unsigned int*)(source0 + 4 * x) = c0; 4202 } 4203 4204 source0 += pitch; 4205 source1 += pitch; 4206 } 4207 } 4208 else if(internal.samples == 4) 4209 { 4210 for(int y = 0; y < height; y++) 4211 { 4212 for(int x = 0; x < width; x++) 4213 { 4214 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4215 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4216 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4217 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4218 4219 c0 = AVERAGE(c0, c1); 4220 c2 = AVERAGE(c2, c3); 4221 c0 = AVERAGE(c0, c2); 4222 4223 *(unsigned int*)(source0 + 4 * x) = c0; 4224 } 4225 4226 source0 += pitch; 4227 source1 += pitch; 4228 source2 += pitch; 4229 source3 += pitch; 4230 } 4231 } 4232 else if(internal.samples == 8) 4233 { 4234 for(int y = 0; y < height; y++) 4235 { 4236 for(int x = 0; x < width; x++) 4237 { 4238 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4239 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4240 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4241 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4242 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4243 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4244 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4245 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4246 4247 c0 = AVERAGE(c0, c1); 4248 c2 = AVERAGE(c2, c3); 4249 c4 = AVERAGE(c4, c5); 4250 c6 = AVERAGE(c6, c7); 4251 c0 = AVERAGE(c0, c2); 4252 c4 = AVERAGE(c4, c6); 4253 c0 = AVERAGE(c0, c4); 4254 4255 *(unsigned int*)(source0 + 4 * x) = c0; 4256 } 4257 4258 source0 += pitch; 4259 source1 += pitch; 4260 source2 += pitch; 4261 source3 += pitch; 4262 source4 += pitch; 4263 source5 += pitch; 4264 source6 += pitch; 4265 source7 += pitch; 4266 } 4267 } 4268 else if(internal.samples == 16) 4269 { 4270 for(int y = 0; y < height; y++) 4271 { 4272 for(int x = 0; x < width; x++) 4273 { 4274 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4275 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4276 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4277 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4278 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4279 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4280 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4281 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4282 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4283 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4284 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4285 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4286 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4287 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4288 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4289 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4290 4291 c0 = AVERAGE(c0, c1); 4292 c2 = AVERAGE(c2, c3); 4293 c4 = AVERAGE(c4, c5); 4294 c6 = AVERAGE(c6, c7); 4295 c8 = AVERAGE(c8, c9); 4296 cA = AVERAGE(cA, cB); 4297 cC = AVERAGE(cC, cD); 4298 cE = AVERAGE(cE, cF); 4299 c0 = AVERAGE(c0, c2); 4300 c4 = AVERAGE(c4, c6); 4301 c8 = AVERAGE(c8, cA); 4302 cC = AVERAGE(cC, cE); 4303 c0 = AVERAGE(c0, c4); 4304 c8 = AVERAGE(c8, cC); 4305 c0 = AVERAGE(c0, c8); 4306 4307 *(unsigned int*)(source0 + 4 * x) = c0; 4308 } 4309 4310 source0 += pitch; 4311 source1 += pitch; 4312 source2 += pitch; 4313 source3 += pitch; 4314 source4 += pitch; 4315 source5 += pitch; 4316 source6 += pitch; 4317 source7 += pitch; 4318 source8 += pitch; 4319 source9 += pitch; 4320 sourceA += pitch; 4321 sourceB += pitch; 4322 sourceC += pitch; 4323 sourceD += pitch; 4324 sourceE += pitch; 4325 sourceF += pitch; 4326 } 4327 } 4328 else ASSERT(false); 4329 4330 #undef AVERAGE 4331 } 4332 } 4333 else if(internal.format == FORMAT_G16R16) 4334 { 4335 4336 #if defined(__i386__) || defined(__x86_64__) 4337 if(CPUID::supportsSSE2() && (width % 4) == 0) 4338 { 4339 if(internal.samples == 2) 4340 { 4341 for(int y = 0; y < height; y++) 4342 { 4343 for(int x = 0; x < width; x += 4) 4344 { 4345 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4346 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4347 4348 c0 = _mm_avg_epu16(c0, c1); 4349 4350 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4351 } 4352 4353 source0 += pitch; 4354 source1 += pitch; 4355 } 4356 } 4357 else if(internal.samples == 4) 4358 { 4359 for(int y = 0; y < height; y++) 4360 { 4361 for(int x = 0; x < width; x += 4) 4362 { 4363 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4364 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4365 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4366 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4367 4368 c0 = _mm_avg_epu16(c0, c1); 4369 c2 = _mm_avg_epu16(c2, c3); 4370 c0 = _mm_avg_epu16(c0, c2); 4371 4372 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4373 } 4374 4375 source0 += pitch; 4376 source1 += pitch; 4377 source2 += pitch; 4378 source3 += pitch; 4379 } 4380 } 4381 else if(internal.samples == 8) 4382 { 4383 for(int y = 0; y < height; y++) 4384 { 4385 for(int x = 0; x < width; x += 4) 4386 { 4387 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4388 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4389 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4390 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4391 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4392 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4393 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4394 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4395 4396 c0 = _mm_avg_epu16(c0, c1); 4397 c2 = _mm_avg_epu16(c2, c3); 4398 c4 = _mm_avg_epu16(c4, c5); 4399 c6 = _mm_avg_epu16(c6, c7); 4400 c0 = _mm_avg_epu16(c0, c2); 4401 c4 = _mm_avg_epu16(c4, c6); 4402 c0 = _mm_avg_epu16(c0, c4); 4403 4404 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4405 } 4406 4407 source0 += pitch; 4408 source1 += pitch; 4409 source2 += pitch; 4410 source3 += pitch; 4411 source4 += pitch; 4412 source5 += pitch; 4413 source6 += pitch; 4414 source7 += pitch; 4415 } 4416 } 4417 else if(internal.samples == 16) 4418 { 4419 for(int y = 0; y < height; y++) 4420 { 4421 for(int x = 0; x < width; x += 4) 4422 { 4423 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4424 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4425 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4426 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4427 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4428 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4429 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4430 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4431 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4432 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4433 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4434 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4435 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4436 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4437 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4438 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4439 4440 c0 = _mm_avg_epu16(c0, c1); 4441 c2 = _mm_avg_epu16(c2, c3); 4442 c4 = _mm_avg_epu16(c4, c5); 4443 c6 = _mm_avg_epu16(c6, c7); 4444 c8 = _mm_avg_epu16(c8, c9); 4445 cA = _mm_avg_epu16(cA, cB); 4446 cC = _mm_avg_epu16(cC, cD); 4447 cE = _mm_avg_epu16(cE, cF); 4448 c0 = _mm_avg_epu16(c0, c2); 4449 c4 = _mm_avg_epu16(c4, c6); 4450 c8 = _mm_avg_epu16(c8, cA); 4451 cC = _mm_avg_epu16(cC, cE); 4452 c0 = _mm_avg_epu16(c0, c4); 4453 c8 = _mm_avg_epu16(c8, cC); 4454 c0 = _mm_avg_epu16(c0, c8); 4455 4456 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4457 } 4458 4459 source0 += pitch; 4460 source1 += pitch; 4461 source2 += pitch; 4462 source3 += pitch; 4463 source4 += pitch; 4464 source5 += pitch; 4465 source6 += pitch; 4466 source7 += pitch; 4467 source8 += pitch; 4468 source9 += pitch; 4469 sourceA += pitch; 4470 sourceB += pitch; 4471 sourceC += pitch; 4472 sourceD += pitch; 4473 sourceE += pitch; 4474 sourceF += pitch; 4475 } 4476 } 4477 else ASSERT(false); 4478 } 4479 else 4480 #endif 4481 { 4482 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4483 4484 if(internal.samples == 2) 4485 { 4486 for(int y = 0; y < height; y++) 4487 { 4488 for(int x = 0; x < width; x++) 4489 { 4490 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4491 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4492 4493 c0 = AVERAGE(c0, c1); 4494 4495 *(unsigned int*)(source0 + 4 * x) = c0; 4496 } 4497 4498 source0 += pitch; 4499 source1 += pitch; 4500 } 4501 } 4502 else if(internal.samples == 4) 4503 { 4504 for(int y = 0; y < height; y++) 4505 { 4506 for(int x = 0; x < width; x++) 4507 { 4508 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4509 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4510 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4511 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4512 4513 c0 = AVERAGE(c0, c1); 4514 c2 = AVERAGE(c2, c3); 4515 c0 = AVERAGE(c0, c2); 4516 4517 *(unsigned int*)(source0 + 4 * x) = c0; 4518 } 4519 4520 source0 += pitch; 4521 source1 += pitch; 4522 source2 += pitch; 4523 source3 += pitch; 4524 } 4525 } 4526 else if(internal.samples == 8) 4527 { 4528 for(int y = 0; y < height; y++) 4529 { 4530 for(int x = 0; x < width; x++) 4531 { 4532 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4533 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4534 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4535 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4536 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4537 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4538 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4539 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4540 4541 c0 = AVERAGE(c0, c1); 4542 c2 = AVERAGE(c2, c3); 4543 c4 = AVERAGE(c4, c5); 4544 c6 = AVERAGE(c6, c7); 4545 c0 = AVERAGE(c0, c2); 4546 c4 = AVERAGE(c4, c6); 4547 c0 = AVERAGE(c0, c4); 4548 4549 *(unsigned int*)(source0 + 4 * x) = c0; 4550 } 4551 4552 source0 += pitch; 4553 source1 += pitch; 4554 source2 += pitch; 4555 source3 += pitch; 4556 source4 += pitch; 4557 source5 += pitch; 4558 source6 += pitch; 4559 source7 += pitch; 4560 } 4561 } 4562 else if(internal.samples == 16) 4563 { 4564 for(int y = 0; y < height; y++) 4565 { 4566 for(int x = 0; x < width; x++) 4567 { 4568 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4569 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4570 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4571 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4572 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4573 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4574 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4575 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4576 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4577 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4578 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4579 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4580 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4581 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4582 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4583 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4584 4585 c0 = AVERAGE(c0, c1); 4586 c2 = AVERAGE(c2, c3); 4587 c4 = AVERAGE(c4, c5); 4588 c6 = AVERAGE(c6, c7); 4589 c8 = AVERAGE(c8, c9); 4590 cA = AVERAGE(cA, cB); 4591 cC = AVERAGE(cC, cD); 4592 cE = AVERAGE(cE, cF); 4593 c0 = AVERAGE(c0, c2); 4594 c4 = AVERAGE(c4, c6); 4595 c8 = AVERAGE(c8, cA); 4596 cC = AVERAGE(cC, cE); 4597 c0 = AVERAGE(c0, c4); 4598 c8 = AVERAGE(c8, cC); 4599 c0 = AVERAGE(c0, c8); 4600 4601 *(unsigned int*)(source0 + 4 * x) = c0; 4602 } 4603 4604 source0 += pitch; 4605 source1 += pitch; 4606 source2 += pitch; 4607 source3 += pitch; 4608 source4 += pitch; 4609 source5 += pitch; 4610 source6 += pitch; 4611 source7 += pitch; 4612 source8 += pitch; 4613 source9 += pitch; 4614 sourceA += pitch; 4615 sourceB += pitch; 4616 sourceC += pitch; 4617 sourceD += pitch; 4618 sourceE += pitch; 4619 sourceF += pitch; 4620 } 4621 } 4622 else ASSERT(false); 4623 4624 #undef AVERAGE 4625 } 4626 } 4627 else if(internal.format == FORMAT_A16B16G16R16) 4628 { 4629 #if defined(__i386__) || defined(__x86_64__) 4630 if(CPUID::supportsSSE2() && (width % 2) == 0) 4631 { 4632 if(internal.samples == 2) 4633 { 4634 for(int y = 0; y < height; y++) 4635 { 4636 for(int x = 0; x < width; x += 2) 4637 { 4638 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4639 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4640 4641 c0 = _mm_avg_epu16(c0, c1); 4642 4643 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4644 } 4645 4646 source0 += pitch; 4647 source1 += pitch; 4648 } 4649 } 4650 else if(internal.samples == 4) 4651 { 4652 for(int y = 0; y < height; y++) 4653 { 4654 for(int x = 0; x < width; x += 2) 4655 { 4656 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4657 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4658 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4659 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4660 4661 c0 = _mm_avg_epu16(c0, c1); 4662 c2 = _mm_avg_epu16(c2, c3); 4663 c0 = _mm_avg_epu16(c0, c2); 4664 4665 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4666 } 4667 4668 source0 += pitch; 4669 source1 += pitch; 4670 source2 += pitch; 4671 source3 += pitch; 4672 } 4673 } 4674 else if(internal.samples == 8) 4675 { 4676 for(int y = 0; y < height; y++) 4677 { 4678 for(int x = 0; x < width; x += 2) 4679 { 4680 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4681 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4682 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4683 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4684 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4685 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4686 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4687 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4688 4689 c0 = _mm_avg_epu16(c0, c1); 4690 c2 = _mm_avg_epu16(c2, c3); 4691 c4 = _mm_avg_epu16(c4, c5); 4692 c6 = _mm_avg_epu16(c6, c7); 4693 c0 = _mm_avg_epu16(c0, c2); 4694 c4 = _mm_avg_epu16(c4, c6); 4695 c0 = _mm_avg_epu16(c0, c4); 4696 4697 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4698 } 4699 4700 source0 += pitch; 4701 source1 += pitch; 4702 source2 += pitch; 4703 source3 += pitch; 4704 source4 += pitch; 4705 source5 += pitch; 4706 source6 += pitch; 4707 source7 += pitch; 4708 } 4709 } 4710 else if(internal.samples == 16) 4711 { 4712 for(int y = 0; y < height; y++) 4713 { 4714 for(int x = 0; x < width; x += 2) 4715 { 4716 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4717 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4718 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4719 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4720 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4721 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4722 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4723 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4724 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x)); 4725 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x)); 4726 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x)); 4727 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x)); 4728 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x)); 4729 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x)); 4730 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x)); 4731 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x)); 4732 4733 c0 = _mm_avg_epu16(c0, c1); 4734 c2 = _mm_avg_epu16(c2, c3); 4735 c4 = _mm_avg_epu16(c4, c5); 4736 c6 = _mm_avg_epu16(c6, c7); 4737 c8 = _mm_avg_epu16(c8, c9); 4738 cA = _mm_avg_epu16(cA, cB); 4739 cC = _mm_avg_epu16(cC, cD); 4740 cE = _mm_avg_epu16(cE, cF); 4741 c0 = _mm_avg_epu16(c0, c2); 4742 c4 = _mm_avg_epu16(c4, c6); 4743 c8 = _mm_avg_epu16(c8, cA); 4744 cC = _mm_avg_epu16(cC, cE); 4745 c0 = _mm_avg_epu16(c0, c4); 4746 c8 = _mm_avg_epu16(c8, cC); 4747 c0 = _mm_avg_epu16(c0, c8); 4748 4749 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4750 } 4751 4752 source0 += pitch; 4753 source1 += pitch; 4754 source2 += pitch; 4755 source3 += pitch; 4756 source4 += pitch; 4757 source5 += pitch; 4758 source6 += pitch; 4759 source7 += pitch; 4760 source8 += pitch; 4761 source9 += pitch; 4762 sourceA += pitch; 4763 sourceB += pitch; 4764 sourceC += pitch; 4765 sourceD += pitch; 4766 sourceE += pitch; 4767 sourceF += pitch; 4768 } 4769 } 4770 else ASSERT(false); 4771 } 4772 else 4773 #endif 4774 { 4775 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4776 4777 if(internal.samples == 2) 4778 { 4779 for(int y = 0; y < height; y++) 4780 { 4781 for(int x = 0; x < 2 * width; x++) 4782 { 4783 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4784 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4785 4786 c0 = AVERAGE(c0, c1); 4787 4788 *(unsigned int*)(source0 + 4 * x) = c0; 4789 } 4790 4791 source0 += pitch; 4792 source1 += pitch; 4793 } 4794 } 4795 else if(internal.samples == 4) 4796 { 4797 for(int y = 0; y < height; y++) 4798 { 4799 for(int x = 0; x < 2 * width; x++) 4800 { 4801 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4802 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4803 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4804 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4805 4806 c0 = AVERAGE(c0, c1); 4807 c2 = AVERAGE(c2, c3); 4808 c0 = AVERAGE(c0, c2); 4809 4810 *(unsigned int*)(source0 + 4 * x) = c0; 4811 } 4812 4813 source0 += pitch; 4814 source1 += pitch; 4815 source2 += pitch; 4816 source3 += pitch; 4817 } 4818 } 4819 else if(internal.samples == 8) 4820 { 4821 for(int y = 0; y < height; y++) 4822 { 4823 for(int x = 0; x < 2 * width; x++) 4824 { 4825 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4826 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4827 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4828 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4829 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4830 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4831 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4832 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4833 4834 c0 = AVERAGE(c0, c1); 4835 c2 = AVERAGE(c2, c3); 4836 c4 = AVERAGE(c4, c5); 4837 c6 = AVERAGE(c6, c7); 4838 c0 = AVERAGE(c0, c2); 4839 c4 = AVERAGE(c4, c6); 4840 c0 = AVERAGE(c0, c4); 4841 4842 *(unsigned int*)(source0 + 4 * x) = c0; 4843 } 4844 4845 source0 += pitch; 4846 source1 += pitch; 4847 source2 += pitch; 4848 source3 += pitch; 4849 source4 += pitch; 4850 source5 += pitch; 4851 source6 += pitch; 4852 source7 += pitch; 4853 } 4854 } 4855 else if(internal.samples == 16) 4856 { 4857 for(int y = 0; y < height; y++) 4858 { 4859 for(int x = 0; x < 2 * width; x++) 4860 { 4861 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4862 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4863 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4864 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4865 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4866 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4867 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4868 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4869 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4870 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4871 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4872 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4873 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4874 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4875 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4876 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4877 4878 c0 = AVERAGE(c0, c1); 4879 c2 = AVERAGE(c2, c3); 4880 c4 = AVERAGE(c4, c5); 4881 c6 = AVERAGE(c6, c7); 4882 c8 = AVERAGE(c8, c9); 4883 cA = AVERAGE(cA, cB); 4884 cC = AVERAGE(cC, cD); 4885 cE = AVERAGE(cE, cF); 4886 c0 = AVERAGE(c0, c2); 4887 c4 = AVERAGE(c4, c6); 4888 c8 = AVERAGE(c8, cA); 4889 cC = AVERAGE(cC, cE); 4890 c0 = AVERAGE(c0, c4); 4891 c8 = AVERAGE(c8, cC); 4892 c0 = AVERAGE(c0, c8); 4893 4894 *(unsigned int*)(source0 + 4 * x) = c0; 4895 } 4896 4897 source0 += pitch; 4898 source1 += pitch; 4899 source2 += pitch; 4900 source3 += pitch; 4901 source4 += pitch; 4902 source5 += pitch; 4903 source6 += pitch; 4904 source7 += pitch; 4905 source8 += pitch; 4906 source9 += pitch; 4907 sourceA += pitch; 4908 sourceB += pitch; 4909 sourceC += pitch; 4910 sourceD += pitch; 4911 sourceE += pitch; 4912 sourceF += pitch; 4913 } 4914 } 4915 else ASSERT(false); 4916 4917 #undef AVERAGE 4918 } 4919 } 4920 else if(internal.format == FORMAT_R32F) 4921 { 4922 #if defined(__i386__) || defined(__x86_64__) 4923 if(CPUID::supportsSSE() && (width % 4) == 0) 4924 { 4925 if(internal.samples == 2) 4926 { 4927 for(int y = 0; y < height; y++) 4928 { 4929 for(int x = 0; x < width; x += 4) 4930 { 4931 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4932 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4933 4934 c0 = _mm_add_ps(c0, c1); 4935 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4936 4937 _mm_store_ps((float*)(source0 + 4 * x), c0); 4938 } 4939 4940 source0 += pitch; 4941 source1 += pitch; 4942 } 4943 } 4944 else if(internal.samples == 4) 4945 { 4946 for(int y = 0; y < height; y++) 4947 { 4948 for(int x = 0; x < width; x += 4) 4949 { 4950 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4951 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4952 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4953 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4954 4955 c0 = _mm_add_ps(c0, c1); 4956 c2 = _mm_add_ps(c2, c3); 4957 c0 = _mm_add_ps(c0, c2); 4958 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4959 4960 _mm_store_ps((float*)(source0 + 4 * x), c0); 4961 } 4962 4963 source0 += pitch; 4964 source1 += pitch; 4965 source2 += pitch; 4966 source3 += pitch; 4967 } 4968 } 4969 else if(internal.samples == 8) 4970 { 4971 for(int y = 0; y < height; y++) 4972 { 4973 for(int x = 0; x < width; x += 4) 4974 { 4975 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4976 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4977 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4978 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4979 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4980 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4981 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4982 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4983 4984 c0 = _mm_add_ps(c0, c1); 4985 c2 = _mm_add_ps(c2, c3); 4986 c4 = _mm_add_ps(c4, c5); 4987 c6 = _mm_add_ps(c6, c7); 4988 c0 = _mm_add_ps(c0, c2); 4989 c4 = _mm_add_ps(c4, c6); 4990 c0 = _mm_add_ps(c0, c4); 4991 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4992 4993 _mm_store_ps((float*)(source0 + 4 * x), c0); 4994 } 4995 4996 source0 += pitch; 4997 source1 += pitch; 4998 source2 += pitch; 4999 source3 += pitch; 5000 source4 += pitch; 5001 source5 += pitch; 5002 source6 += pitch; 5003 source7 += pitch; 5004 } 5005 } 5006 else if(internal.samples == 16) 5007 { 5008 for(int y = 0; y < height; y++) 5009 { 5010 for(int x = 0; x < width; x += 4) 5011 { 5012 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 5013 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 5014 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 5015 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 5016 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 5017 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 5018 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 5019 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 5020 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x)); 5021 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x)); 5022 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x)); 5023 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x)); 5024 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x)); 5025 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x)); 5026 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x)); 5027 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x)); 5028 5029 c0 = _mm_add_ps(c0, c1); 5030 c2 = _mm_add_ps(c2, c3); 5031 c4 = _mm_add_ps(c4, c5); 5032 c6 = _mm_add_ps(c6, c7); 5033 c8 = _mm_add_ps(c8, c9); 5034 cA = _mm_add_ps(cA, cB); 5035 cC = _mm_add_ps(cC, cD); 5036 cE = _mm_add_ps(cE, cF); 5037 c0 = _mm_add_ps(c0, c2); 5038 c4 = _mm_add_ps(c4, c6); 5039 c8 = _mm_add_ps(c8, cA); 5040 cC = _mm_add_ps(cC, cE); 5041 c0 = _mm_add_ps(c0, c4); 5042 c8 = _mm_add_ps(c8, cC); 5043 c0 = _mm_add_ps(c0, c8); 5044 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5045 5046 _mm_store_ps((float*)(source0 + 4 * x), c0); 5047 } 5048 5049 source0 += pitch; 5050 source1 += pitch; 5051 source2 += pitch; 5052 source3 += pitch; 5053 source4 += pitch; 5054 source5 += pitch; 5055 source6 += pitch; 5056 source7 += pitch; 5057 source8 += pitch; 5058 source9 += pitch; 5059 sourceA += pitch; 5060 sourceB += pitch; 5061 sourceC += pitch; 5062 sourceD += pitch; 5063 sourceE += pitch; 5064 sourceF += pitch; 5065 } 5066 } 5067 else ASSERT(false); 5068 } 5069 else 5070 #endif 5071 { 5072 if(internal.samples == 2) 5073 { 5074 for(int y = 0; y < height; y++) 5075 { 5076 for(int x = 0; x < width; x++) 5077 { 5078 float c0 = *(float*)(source0 + 4 * x); 5079 float c1 = *(float*)(source1 + 4 * x); 5080 5081 c0 = c0 + c1; 5082 c0 *= 1.0f / 2.0f; 5083 5084 *(float*)(source0 + 4 * x) = c0; 5085 } 5086 5087 source0 += pitch; 5088 source1 += pitch; 5089 } 5090 } 5091 else if(internal.samples == 4) 5092 { 5093 for(int y = 0; y < height; y++) 5094 { 5095 for(int x = 0; x < width; x++) 5096 { 5097 float c0 = *(float*)(source0 + 4 * x); 5098 float c1 = *(float*)(source1 + 4 * x); 5099 float c2 = *(float*)(source2 + 4 * x); 5100 float c3 = *(float*)(source3 + 4 * x); 5101 5102 c0 = c0 + c1; 5103 c2 = c2 + c3; 5104 c0 = c0 + c2; 5105 c0 *= 1.0f / 4.0f; 5106 5107 *(float*)(source0 + 4 * x) = c0; 5108 } 5109 5110 source0 += pitch; 5111 source1 += pitch; 5112 source2 += pitch; 5113 source3 += pitch; 5114 } 5115 } 5116 else if(internal.samples == 8) 5117 { 5118 for(int y = 0; y < height; y++) 5119 { 5120 for(int x = 0; x < width; x++) 5121 { 5122 float c0 = *(float*)(source0 + 4 * x); 5123 float c1 = *(float*)(source1 + 4 * x); 5124 float c2 = *(float*)(source2 + 4 * x); 5125 float c3 = *(float*)(source3 + 4 * x); 5126 float c4 = *(float*)(source4 + 4 * x); 5127 float c5 = *(float*)(source5 + 4 * x); 5128 float c6 = *(float*)(source6 + 4 * x); 5129 float c7 = *(float*)(source7 + 4 * x); 5130 5131 c0 = c0 + c1; 5132 c2 = c2 + c3; 5133 c4 = c4 + c5; 5134 c6 = c6 + c7; 5135 c0 = c0 + c2; 5136 c4 = c4 + c6; 5137 c0 = c0 + c4; 5138 c0 *= 1.0f / 8.0f; 5139 5140 *(float*)(source0 + 4 * x) = c0; 5141 } 5142 5143 source0 += pitch; 5144 source1 += pitch; 5145 source2 += pitch; 5146 source3 += pitch; 5147 source4 += pitch; 5148 source5 += pitch; 5149 source6 += pitch; 5150 source7 += pitch; 5151 } 5152 } 5153 else if(internal.samples == 16) 5154 { 5155 for(int y = 0; y < height; y++) 5156 { 5157 for(int x = 0; x < width; x++) 5158 { 5159 float c0 = *(float*)(source0 + 4 * x); 5160 float c1 = *(float*)(source1 + 4 * x); 5161 float c2 = *(float*)(source2 + 4 * x); 5162 float c3 = *(float*)(source3 + 4 * x); 5163 float c4 = *(float*)(source4 + 4 * x); 5164 float c5 = *(float*)(source5 + 4 * x); 5165 float c6 = *(float*)(source6 + 4 * x); 5166 float c7 = *(float*)(source7 + 4 * x); 5167 float c8 = *(float*)(source8 + 4 * x); 5168 float c9 = *(float*)(source9 + 4 * x); 5169 float cA = *(float*)(sourceA + 4 * x); 5170 float cB = *(float*)(sourceB + 4 * x); 5171 float cC = *(float*)(sourceC + 4 * x); 5172 float cD = *(float*)(sourceD + 4 * x); 5173 float cE = *(float*)(sourceE + 4 * x); 5174 float cF = *(float*)(sourceF + 4 * x); 5175 5176 c0 = c0 + c1; 5177 c2 = c2 + c3; 5178 c4 = c4 + c5; 5179 c6 = c6 + c7; 5180 c8 = c8 + c9; 5181 cA = cA + cB; 5182 cC = cC + cD; 5183 cE = cE + cF; 5184 c0 = c0 + c2; 5185 c4 = c4 + c6; 5186 c8 = c8 + cA; 5187 cC = cC + cE; 5188 c0 = c0 + c4; 5189 c8 = c8 + cC; 5190 c0 = c0 + c8; 5191 c0 *= 1.0f / 16.0f; 5192 5193 *(float*)(source0 + 4 * x) = c0; 5194 } 5195 5196 source0 += pitch; 5197 source1 += pitch; 5198 source2 += pitch; 5199 source3 += pitch; 5200 source4 += pitch; 5201 source5 += pitch; 5202 source6 += pitch; 5203 source7 += pitch; 5204 source8 += pitch; 5205 source9 += pitch; 5206 sourceA += pitch; 5207 sourceB += pitch; 5208 sourceC += pitch; 5209 sourceD += pitch; 5210 sourceE += pitch; 5211 sourceF += pitch; 5212 } 5213 } 5214 else ASSERT(false); 5215 } 5216 } 5217 else if(internal.format == FORMAT_G32R32F) 5218 { 5219 #if defined(__i386__) || defined(__x86_64__) 5220 if(CPUID::supportsSSE() && (width % 2) == 0) 5221 { 5222 if(internal.samples == 2) 5223 { 5224 for(int y = 0; y < height; y++) 5225 { 5226 for(int x = 0; x < width; x += 2) 5227 { 5228 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5229 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5230 5231 c0 = _mm_add_ps(c0, c1); 5232 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5233 5234 _mm_store_ps((float*)(source0 + 8 * x), c0); 5235 } 5236 5237 source0 += pitch; 5238 source1 += pitch; 5239 } 5240 } 5241 else if(internal.samples == 4) 5242 { 5243 for(int y = 0; y < height; y++) 5244 { 5245 for(int x = 0; x < width; x += 2) 5246 { 5247 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5248 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5249 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5250 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5251 5252 c0 = _mm_add_ps(c0, c1); 5253 c2 = _mm_add_ps(c2, c3); 5254 c0 = _mm_add_ps(c0, c2); 5255 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5256 5257 _mm_store_ps((float*)(source0 + 8 * x), c0); 5258 } 5259 5260 source0 += pitch; 5261 source1 += pitch; 5262 source2 += pitch; 5263 source3 += pitch; 5264 } 5265 } 5266 else if(internal.samples == 8) 5267 { 5268 for(int y = 0; y < height; y++) 5269 { 5270 for(int x = 0; x < width; x += 2) 5271 { 5272 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5273 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5274 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5275 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5276 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5277 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5278 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5279 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5280 5281 c0 = _mm_add_ps(c0, c1); 5282 c2 = _mm_add_ps(c2, c3); 5283 c4 = _mm_add_ps(c4, c5); 5284 c6 = _mm_add_ps(c6, c7); 5285 c0 = _mm_add_ps(c0, c2); 5286 c4 = _mm_add_ps(c4, c6); 5287 c0 = _mm_add_ps(c0, c4); 5288 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5289 5290 _mm_store_ps((float*)(source0 + 8 * x), c0); 5291 } 5292 5293 source0 += pitch; 5294 source1 += pitch; 5295 source2 += pitch; 5296 source3 += pitch; 5297 source4 += pitch; 5298 source5 += pitch; 5299 source6 += pitch; 5300 source7 += pitch; 5301 } 5302 } 5303 else if(internal.samples == 16) 5304 { 5305 for(int y = 0; y < height; y++) 5306 { 5307 for(int x = 0; x < width; x += 2) 5308 { 5309 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5310 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5311 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5312 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5313 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5314 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5315 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5316 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5317 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x)); 5318 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x)); 5319 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x)); 5320 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x)); 5321 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x)); 5322 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x)); 5323 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x)); 5324 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x)); 5325 5326 c0 = _mm_add_ps(c0, c1); 5327 c2 = _mm_add_ps(c2, c3); 5328 c4 = _mm_add_ps(c4, c5); 5329 c6 = _mm_add_ps(c6, c7); 5330 c8 = _mm_add_ps(c8, c9); 5331 cA = _mm_add_ps(cA, cB); 5332 cC = _mm_add_ps(cC, cD); 5333 cE = _mm_add_ps(cE, cF); 5334 c0 = _mm_add_ps(c0, c2); 5335 c4 = _mm_add_ps(c4, c6); 5336 c8 = _mm_add_ps(c8, cA); 5337 cC = _mm_add_ps(cC, cE); 5338 c0 = _mm_add_ps(c0, c4); 5339 c8 = _mm_add_ps(c8, cC); 5340 c0 = _mm_add_ps(c0, c8); 5341 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5342 5343 _mm_store_ps((float*)(source0 + 8 * x), c0); 5344 } 5345 5346 source0 += pitch; 5347 source1 += pitch; 5348 source2 += pitch; 5349 source3 += pitch; 5350 source4 += pitch; 5351 source5 += pitch; 5352 source6 += pitch; 5353 source7 += pitch; 5354 source8 += pitch; 5355 source9 += pitch; 5356 sourceA += pitch; 5357 sourceB += pitch; 5358 sourceC += pitch; 5359 sourceD += pitch; 5360 sourceE += pitch; 5361 sourceF += pitch; 5362 } 5363 } 5364 else ASSERT(false); 5365 } 5366 else 5367 #endif 5368 { 5369 if(internal.samples == 2) 5370 { 5371 for(int y = 0; y < height; y++) 5372 { 5373 for(int x = 0; x < 2 * width; x++) 5374 { 5375 float c0 = *(float*)(source0 + 4 * x); 5376 float c1 = *(float*)(source1 + 4 * x); 5377 5378 c0 = c0 + c1; 5379 c0 *= 1.0f / 2.0f; 5380 5381 *(float*)(source0 + 4 * x) = c0; 5382 } 5383 5384 source0 += pitch; 5385 source1 += pitch; 5386 } 5387 } 5388 else if(internal.samples == 4) 5389 { 5390 for(int y = 0; y < height; y++) 5391 { 5392 for(int x = 0; x < 2 * width; x++) 5393 { 5394 float c0 = *(float*)(source0 + 4 * x); 5395 float c1 = *(float*)(source1 + 4 * x); 5396 float c2 = *(float*)(source2 + 4 * x); 5397 float c3 = *(float*)(source3 + 4 * x); 5398 5399 c0 = c0 + c1; 5400 c2 = c2 + c3; 5401 c0 = c0 + c2; 5402 c0 *= 1.0f / 4.0f; 5403 5404 *(float*)(source0 + 4 * x) = c0; 5405 } 5406 5407 source0 += pitch; 5408 source1 += pitch; 5409 source2 += pitch; 5410 source3 += pitch; 5411 } 5412 } 5413 else if(internal.samples == 8) 5414 { 5415 for(int y = 0; y < height; y++) 5416 { 5417 for(int x = 0; x < 2 * width; x++) 5418 { 5419 float c0 = *(float*)(source0 + 4 * x); 5420 float c1 = *(float*)(source1 + 4 * x); 5421 float c2 = *(float*)(source2 + 4 * x); 5422 float c3 = *(float*)(source3 + 4 * x); 5423 float c4 = *(float*)(source4 + 4 * x); 5424 float c5 = *(float*)(source5 + 4 * x); 5425 float c6 = *(float*)(source6 + 4 * x); 5426 float c7 = *(float*)(source7 + 4 * x); 5427 5428 c0 = c0 + c1; 5429 c2 = c2 + c3; 5430 c4 = c4 + c5; 5431 c6 = c6 + c7; 5432 c0 = c0 + c2; 5433 c4 = c4 + c6; 5434 c0 = c0 + c4; 5435 c0 *= 1.0f / 8.0f; 5436 5437 *(float*)(source0 + 4 * x) = c0; 5438 } 5439 5440 source0 += pitch; 5441 source1 += pitch; 5442 source2 += pitch; 5443 source3 += pitch; 5444 source4 += pitch; 5445 source5 += pitch; 5446 source6 += pitch; 5447 source7 += pitch; 5448 } 5449 } 5450 else if(internal.samples == 16) 5451 { 5452 for(int y = 0; y < height; y++) 5453 { 5454 for(int x = 0; x < 2 * width; x++) 5455 { 5456 float c0 = *(float*)(source0 + 4 * x); 5457 float c1 = *(float*)(source1 + 4 * x); 5458 float c2 = *(float*)(source2 + 4 * x); 5459 float c3 = *(float*)(source3 + 4 * x); 5460 float c4 = *(float*)(source4 + 4 * x); 5461 float c5 = *(float*)(source5 + 4 * x); 5462 float c6 = *(float*)(source6 + 4 * x); 5463 float c7 = *(float*)(source7 + 4 * x); 5464 float c8 = *(float*)(source8 + 4 * x); 5465 float c9 = *(float*)(source9 + 4 * x); 5466 float cA = *(float*)(sourceA + 4 * x); 5467 float cB = *(float*)(sourceB + 4 * x); 5468 float cC = *(float*)(sourceC + 4 * x); 5469 float cD = *(float*)(sourceD + 4 * x); 5470 float cE = *(float*)(sourceE + 4 * x); 5471 float cF = *(float*)(sourceF + 4 * x); 5472 5473 c0 = c0 + c1; 5474 c2 = c2 + c3; 5475 c4 = c4 + c5; 5476 c6 = c6 + c7; 5477 c8 = c8 + c9; 5478 cA = cA + cB; 5479 cC = cC + cD; 5480 cE = cE + cF; 5481 c0 = c0 + c2; 5482 c4 = c4 + c6; 5483 c8 = c8 + cA; 5484 cC = cC + cE; 5485 c0 = c0 + c4; 5486 c8 = c8 + cC; 5487 c0 = c0 + c8; 5488 c0 *= 1.0f / 16.0f; 5489 5490 *(float*)(source0 + 4 * x) = c0; 5491 } 5492 5493 source0 += pitch; 5494 source1 += pitch; 5495 source2 += pitch; 5496 source3 += pitch; 5497 source4 += pitch; 5498 source5 += pitch; 5499 source6 += pitch; 5500 source7 += pitch; 5501 source8 += pitch; 5502 source9 += pitch; 5503 sourceA += pitch; 5504 sourceB += pitch; 5505 sourceC += pitch; 5506 sourceD += pitch; 5507 sourceE += pitch; 5508 sourceF += pitch; 5509 } 5510 } 5511 else ASSERT(false); 5512 } 5513 } 5514 else if(internal.format == FORMAT_A32B32G32R32F || 5515 internal.format == FORMAT_X32B32G32R32F || 5516 internal.format == FORMAT_X32B32G32R32F_UNSIGNED) 5517 { 5518 #if defined(__i386__) || defined(__x86_64__) 5519 if(CPUID::supportsSSE()) 5520 { 5521 if(internal.samples == 2) 5522 { 5523 for(int y = 0; y < height; y++) 5524 { 5525 for(int x = 0; x < width; x++) 5526 { 5527 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5528 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5529 5530 c0 = _mm_add_ps(c0, c1); 5531 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5532 5533 _mm_store_ps((float*)(source0 + 16 * x), c0); 5534 } 5535 5536 source0 += pitch; 5537 source1 += pitch; 5538 } 5539 } 5540 else if(internal.samples == 4) 5541 { 5542 for(int y = 0; y < height; y++) 5543 { 5544 for(int x = 0; x < width; x++) 5545 { 5546 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5547 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5548 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5549 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5550 5551 c0 = _mm_add_ps(c0, c1); 5552 c2 = _mm_add_ps(c2, c3); 5553 c0 = _mm_add_ps(c0, c2); 5554 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5555 5556 _mm_store_ps((float*)(source0 + 16 * x), c0); 5557 } 5558 5559 source0 += pitch; 5560 source1 += pitch; 5561 source2 += pitch; 5562 source3 += pitch; 5563 } 5564 } 5565 else if(internal.samples == 8) 5566 { 5567 for(int y = 0; y < height; y++) 5568 { 5569 for(int x = 0; x < width; x++) 5570 { 5571 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5572 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5573 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5574 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5575 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5576 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5577 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5578 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5579 5580 c0 = _mm_add_ps(c0, c1); 5581 c2 = _mm_add_ps(c2, c3); 5582 c4 = _mm_add_ps(c4, c5); 5583 c6 = _mm_add_ps(c6, c7); 5584 c0 = _mm_add_ps(c0, c2); 5585 c4 = _mm_add_ps(c4, c6); 5586 c0 = _mm_add_ps(c0, c4); 5587 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5588 5589 _mm_store_ps((float*)(source0 + 16 * x), c0); 5590 } 5591 5592 source0 += pitch; 5593 source1 += pitch; 5594 source2 += pitch; 5595 source3 += pitch; 5596 source4 += pitch; 5597 source5 += pitch; 5598 source6 += pitch; 5599 source7 += pitch; 5600 } 5601 } 5602 else if(internal.samples == 16) 5603 { 5604 for(int y = 0; y < height; y++) 5605 { 5606 for(int x = 0; x < width; x++) 5607 { 5608 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5609 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5610 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5611 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5612 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5613 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5614 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5615 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5616 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x)); 5617 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x)); 5618 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x)); 5619 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x)); 5620 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x)); 5621 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x)); 5622 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x)); 5623 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x)); 5624 5625 c0 = _mm_add_ps(c0, c1); 5626 c2 = _mm_add_ps(c2, c3); 5627 c4 = _mm_add_ps(c4, c5); 5628 c6 = _mm_add_ps(c6, c7); 5629 c8 = _mm_add_ps(c8, c9); 5630 cA = _mm_add_ps(cA, cB); 5631 cC = _mm_add_ps(cC, cD); 5632 cE = _mm_add_ps(cE, cF); 5633 c0 = _mm_add_ps(c0, c2); 5634 c4 = _mm_add_ps(c4, c6); 5635 c8 = _mm_add_ps(c8, cA); 5636 cC = _mm_add_ps(cC, cE); 5637 c0 = _mm_add_ps(c0, c4); 5638 c8 = _mm_add_ps(c8, cC); 5639 c0 = _mm_add_ps(c0, c8); 5640 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5641 5642 _mm_store_ps((float*)(source0 + 16 * x), c0); 5643 } 5644 5645 source0 += pitch; 5646 source1 += pitch; 5647 source2 += pitch; 5648 source3 += pitch; 5649 source4 += pitch; 5650 source5 += pitch; 5651 source6 += pitch; 5652 source7 += pitch; 5653 source8 += pitch; 5654 source9 += pitch; 5655 sourceA += pitch; 5656 sourceB += pitch; 5657 sourceC += pitch; 5658 sourceD += pitch; 5659 sourceE += pitch; 5660 sourceF += pitch; 5661 } 5662 } 5663 else ASSERT(false); 5664 } 5665 else 5666 #endif 5667 { 5668 if(internal.samples == 2) 5669 { 5670 for(int y = 0; y < height; y++) 5671 { 5672 for(int x = 0; x < 4 * width; x++) 5673 { 5674 float c0 = *(float*)(source0 + 4 * x); 5675 float c1 = *(float*)(source1 + 4 * x); 5676 5677 c0 = c0 + c1; 5678 c0 *= 1.0f / 2.0f; 5679 5680 *(float*)(source0 + 4 * x) = c0; 5681 } 5682 5683 source0 += pitch; 5684 source1 += pitch; 5685 } 5686 } 5687 else if(internal.samples == 4) 5688 { 5689 for(int y = 0; y < height; y++) 5690 { 5691 for(int x = 0; x < 4 * width; x++) 5692 { 5693 float c0 = *(float*)(source0 + 4 * x); 5694 float c1 = *(float*)(source1 + 4 * x); 5695 float c2 = *(float*)(source2 + 4 * x); 5696 float c3 = *(float*)(source3 + 4 * x); 5697 5698 c0 = c0 + c1; 5699 c2 = c2 + c3; 5700 c0 = c0 + c2; 5701 c0 *= 1.0f / 4.0f; 5702 5703 *(float*)(source0 + 4 * x) = c0; 5704 } 5705 5706 source0 += pitch; 5707 source1 += pitch; 5708 source2 += pitch; 5709 source3 += pitch; 5710 } 5711 } 5712 else if(internal.samples == 8) 5713 { 5714 for(int y = 0; y < height; y++) 5715 { 5716 for(int x = 0; x < 4 * width; x++) 5717 { 5718 float c0 = *(float*)(source0 + 4 * x); 5719 float c1 = *(float*)(source1 + 4 * x); 5720 float c2 = *(float*)(source2 + 4 * x); 5721 float c3 = *(float*)(source3 + 4 * x); 5722 float c4 = *(float*)(source4 + 4 * x); 5723 float c5 = *(float*)(source5 + 4 * x); 5724 float c6 = *(float*)(source6 + 4 * x); 5725 float c7 = *(float*)(source7 + 4 * x); 5726 5727 c0 = c0 + c1; 5728 c2 = c2 + c3; 5729 c4 = c4 + c5; 5730 c6 = c6 + c7; 5731 c0 = c0 + c2; 5732 c4 = c4 + c6; 5733 c0 = c0 + c4; 5734 c0 *= 1.0f / 8.0f; 5735 5736 *(float*)(source0 + 4 * x) = c0; 5737 } 5738 5739 source0 += pitch; 5740 source1 += pitch; 5741 source2 += pitch; 5742 source3 += pitch; 5743 source4 += pitch; 5744 source5 += pitch; 5745 source6 += pitch; 5746 source7 += pitch; 5747 } 5748 } 5749 else if(internal.samples == 16) 5750 { 5751 for(int y = 0; y < height; y++) 5752 { 5753 for(int x = 0; x < 4 * width; x++) 5754 { 5755 float c0 = *(float*)(source0 + 4 * x); 5756 float c1 = *(float*)(source1 + 4 * x); 5757 float c2 = *(float*)(source2 + 4 * x); 5758 float c3 = *(float*)(source3 + 4 * x); 5759 float c4 = *(float*)(source4 + 4 * x); 5760 float c5 = *(float*)(source5 + 4 * x); 5761 float c6 = *(float*)(source6 + 4 * x); 5762 float c7 = *(float*)(source7 + 4 * x); 5763 float c8 = *(float*)(source8 + 4 * x); 5764 float c9 = *(float*)(source9 + 4 * x); 5765 float cA = *(float*)(sourceA + 4 * x); 5766 float cB = *(float*)(sourceB + 4 * x); 5767 float cC = *(float*)(sourceC + 4 * x); 5768 float cD = *(float*)(sourceD + 4 * x); 5769 float cE = *(float*)(sourceE + 4 * x); 5770 float cF = *(float*)(sourceF + 4 * x); 5771 5772 c0 = c0 + c1; 5773 c2 = c2 + c3; 5774 c4 = c4 + c5; 5775 c6 = c6 + c7; 5776 c8 = c8 + c9; 5777 cA = cA + cB; 5778 cC = cC + cD; 5779 cE = cE + cF; 5780 c0 = c0 + c2; 5781 c4 = c4 + c6; 5782 c8 = c8 + cA; 5783 cC = cC + cE; 5784 c0 = c0 + c4; 5785 c8 = c8 + cC; 5786 c0 = c0 + c8; 5787 c0 *= 1.0f / 16.0f; 5788 5789 *(float*)(source0 + 4 * x) = c0; 5790 } 5791 5792 source0 += pitch; 5793 source1 += pitch; 5794 source2 += pitch; 5795 source3 += pitch; 5796 source4 += pitch; 5797 source5 += pitch; 5798 source6 += pitch; 5799 source7 += pitch; 5800 source8 += pitch; 5801 source9 += pitch; 5802 sourceA += pitch; 5803 sourceB += pitch; 5804 sourceC += pitch; 5805 sourceD += pitch; 5806 sourceE += pitch; 5807 sourceF += pitch; 5808 } 5809 } 5810 else ASSERT(false); 5811 } 5812 } 5813 else if(internal.format == FORMAT_R5G6B5) 5814 { 5815 #if defined(__i386__) || defined(__x86_64__) 5816 if(CPUID::supportsSSE2() && (width % 8) == 0) 5817 { 5818 if(internal.samples == 2) 5819 { 5820 for(int y = 0; y < height; y++) 5821 { 5822 for(int x = 0; x < width; x += 8) 5823 { 5824 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5825 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5826 5827 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5828 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5829 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5830 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5831 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5832 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5833 5834 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5835 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5836 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5837 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5838 c0 = _mm_or_si128(c0, c1); 5839 5840 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5841 } 5842 5843 source0 += pitch; 5844 source1 += pitch; 5845 } 5846 } 5847 else if(internal.samples == 4) 5848 { 5849 for(int y = 0; y < height; y++) 5850 { 5851 for(int x = 0; x < width; x += 8) 5852 { 5853 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5854 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5855 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5856 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5857 5858 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5859 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5860 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5861 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5862 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5863 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5864 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5865 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5866 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5867 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5868 5869 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5870 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5871 c0 = _mm_avg_epu8(c0, c2); 5872 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5873 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5874 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5875 c1 = _mm_avg_epu16(c1, c3); 5876 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5877 c0 = _mm_or_si128(c0, c1); 5878 5879 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5880 } 5881 5882 source0 += pitch; 5883 source1 += pitch; 5884 source2 += pitch; 5885 source3 += pitch; 5886 } 5887 } 5888 else if(internal.samples == 8) 5889 { 5890 for(int y = 0; y < height; y++) 5891 { 5892 for(int x = 0; x < width; x += 8) 5893 { 5894 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5895 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5896 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5897 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5898 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5899 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5900 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5901 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5902 5903 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5904 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5905 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5906 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5907 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5908 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5909 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5910 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5911 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5912 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5913 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5914 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5915 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5916 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5917 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5918 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5919 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5920 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5921 5922 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5923 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5924 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5925 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5926 c0 = _mm_avg_epu8(c0, c2); 5927 c4 = _mm_avg_epu8(c4, c6); 5928 c0 = _mm_avg_epu8(c0, c4); 5929 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5930 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5931 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5932 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5933 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5934 c1 = _mm_avg_epu16(c1, c3); 5935 c5 = _mm_avg_epu16(c5, c7); 5936 c1 = _mm_avg_epu16(c1, c5); 5937 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5938 c0 = _mm_or_si128(c0, c1); 5939 5940 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5941 } 5942 5943 source0 += pitch; 5944 source1 += pitch; 5945 source2 += pitch; 5946 source3 += pitch; 5947 source4 += pitch; 5948 source5 += pitch; 5949 source6 += pitch; 5950 source7 += pitch; 5951 } 5952 } 5953 else if(internal.samples == 16) 5954 { 5955 for(int y = 0; y < height; y++) 5956 { 5957 for(int x = 0; x < width; x += 8) 5958 { 5959 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5960 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5961 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5962 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5963 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5964 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5965 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5966 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5967 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x)); 5968 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x)); 5969 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x)); 5970 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x)); 5971 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x)); 5972 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x)); 5973 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x)); 5974 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x)); 5975 5976 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5977 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5978 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5979 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5980 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5981 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5982 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5983 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5984 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5985 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5986 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5987 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5988 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5989 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5990 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5991 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5992 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5993 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5994 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b)); 5995 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_)); 5996 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b)); 5997 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_)); 5998 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b)); 5999 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_)); 6000 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b)); 6001 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_)); 6002 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b)); 6003 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_)); 6004 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b)); 6005 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_)); 6006 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b)); 6007 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_)); 6008 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b)); 6009 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_)); 6010 6011 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 6012 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 6013 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 6014 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 6015 c8 = _mm_avg_epu8(c8_r_b, c9_r_b); 6016 cA = _mm_avg_epu8(cA_r_b, cB_r_b); 6017 cC = _mm_avg_epu8(cC_r_b, cD_r_b); 6018 cE = _mm_avg_epu8(cE_r_b, cF_r_b); 6019 c0 = _mm_avg_epu8(c0, c2); 6020 c4 = _mm_avg_epu8(c4, c6); 6021 c8 = _mm_avg_epu8(c8, cA); 6022 cC = _mm_avg_epu8(cC, cE); 6023 c0 = _mm_avg_epu8(c0, c4); 6024 c8 = _mm_avg_epu8(c8, cC); 6025 c0 = _mm_avg_epu8(c0, c8); 6026 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 6027 c1 = _mm_avg_epu16(c0__g_, c1__g_); 6028 c3 = _mm_avg_epu16(c2__g_, c3__g_); 6029 c5 = _mm_avg_epu16(c4__g_, c5__g_); 6030 c7 = _mm_avg_epu16(c6__g_, c7__g_); 6031 c9 = _mm_avg_epu16(c8__g_, c9__g_); 6032 cB = _mm_avg_epu16(cA__g_, cB__g_); 6033 cD = _mm_avg_epu16(cC__g_, cD__g_); 6034 cF = _mm_avg_epu16(cE__g_, cF__g_); 6035 c1 = _mm_avg_epu8(c1, c3); 6036 c5 = _mm_avg_epu8(c5, c7); 6037 c9 = _mm_avg_epu8(c9, cB); 6038 cD = _mm_avg_epu8(cD, cF); 6039 c1 = _mm_avg_epu8(c1, c5); 6040 c9 = _mm_avg_epu8(c9, cD); 6041 c1 = _mm_avg_epu8(c1, c9); 6042 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 6043 c0 = _mm_or_si128(c0, c1); 6044 6045 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 6046 } 6047 6048 source0 += pitch; 6049 source1 += pitch; 6050 source2 += pitch; 6051 source3 += pitch; 6052 source4 += pitch; 6053 source5 += pitch; 6054 source6 += pitch; 6055 source7 += pitch; 6056 source8 += pitch; 6057 source9 += pitch; 6058 sourceA += pitch; 6059 sourceB += pitch; 6060 sourceC += pitch; 6061 sourceD += pitch; 6062 sourceE += pitch; 6063 sourceF += pitch; 6064 } 6065 } 6066 else ASSERT(false); 6067 } 6068 else 6069 #endif 6070 { 6071 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821)) 6072 6073 if(internal.samples == 2) 6074 { 6075 for(int y = 0; y < height; y++) 6076 { 6077 for(int x = 0; x < width; x++) 6078 { 6079 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6080 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6081 6082 c0 = AVERAGE(c0, c1); 6083 6084 *(unsigned short*)(source0 + 2 * x) = c0; 6085 } 6086 6087 source0 += pitch; 6088 source1 += pitch; 6089 } 6090 } 6091 else if(internal.samples == 4) 6092 { 6093 for(int y = 0; y < height; y++) 6094 { 6095 for(int x = 0; x < width; x++) 6096 { 6097 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6098 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6099 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 6100 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 6101 6102 c0 = AVERAGE(c0, c1); 6103 c2 = AVERAGE(c2, c3); 6104 c0 = AVERAGE(c0, c2); 6105 6106 *(unsigned short*)(source0 + 2 * x) = c0; 6107 } 6108 6109 source0 += pitch; 6110 source1 += pitch; 6111 source2 += pitch; 6112 source3 += pitch; 6113 } 6114 } 6115 else if(internal.samples == 8) 6116 { 6117 for(int y = 0; y < height; y++) 6118 { 6119 for(int x = 0; x < width; x++) 6120 { 6121 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6122 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6123 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 6124 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 6125 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 6126 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 6127 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 6128 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 6129 6130 c0 = AVERAGE(c0, c1); 6131 c2 = AVERAGE(c2, c3); 6132 c4 = AVERAGE(c4, c5); 6133 c6 = AVERAGE(c6, c7); 6134 c0 = AVERAGE(c0, c2); 6135 c4 = AVERAGE(c4, c6); 6136 c0 = AVERAGE(c0, c4); 6137 6138 *(unsigned short*)(source0 + 2 * x) = c0; 6139 } 6140 6141 source0 += pitch; 6142 source1 += pitch; 6143 source2 += pitch; 6144 source3 += pitch; 6145 source4 += pitch; 6146 source5 += pitch; 6147 source6 += pitch; 6148 source7 += pitch; 6149 } 6150 } 6151 else if(internal.samples == 16) 6152 { 6153 for(int y = 0; y < height; y++) 6154 { 6155 for(int x = 0; x < width; x++) 6156 { 6157 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6158 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6159 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 6160 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 6161 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 6162 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 6163 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 6164 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 6165 unsigned short c8 = *(unsigned short*)(source8 + 2 * x); 6166 unsigned short c9 = *(unsigned short*)(source9 + 2 * x); 6167 unsigned short cA = *(unsigned short*)(sourceA + 2 * x); 6168 unsigned short cB = *(unsigned short*)(sourceB + 2 * x); 6169 unsigned short cC = *(unsigned short*)(sourceC + 2 * x); 6170 unsigned short cD = *(unsigned short*)(sourceD + 2 * x); 6171 unsigned short cE = *(unsigned short*)(sourceE + 2 * x); 6172 unsigned short cF = *(unsigned short*)(sourceF + 2 * x); 6173 6174 c0 = AVERAGE(c0, c1); 6175 c2 = AVERAGE(c2, c3); 6176 c4 = AVERAGE(c4, c5); 6177 c6 = AVERAGE(c6, c7); 6178 c8 = AVERAGE(c8, c9); 6179 cA = AVERAGE(cA, cB); 6180 cC = AVERAGE(cC, cD); 6181 cE = AVERAGE(cE, cF); 6182 c0 = AVERAGE(c0, c2); 6183 c4 = AVERAGE(c4, c6); 6184 c8 = AVERAGE(c8, cA); 6185 cC = AVERAGE(cC, cE); 6186 c0 = AVERAGE(c0, c4); 6187 c8 = AVERAGE(c8, cC); 6188 c0 = AVERAGE(c0, c8); 6189 6190 *(unsigned short*)(source0 + 2 * x) = c0; 6191 } 6192 6193 source0 += pitch; 6194 source1 += pitch; 6195 source2 += pitch; 6196 source3 += pitch; 6197 source4 += pitch; 6198 source5 += pitch; 6199 source6 += pitch; 6200 source7 += pitch; 6201 source8 += pitch; 6202 source9 += pitch; 6203 sourceA += pitch; 6204 sourceB += pitch; 6205 sourceC += pitch; 6206 sourceD += pitch; 6207 sourceE += pitch; 6208 sourceF += pitch; 6209 } 6210 } 6211 else ASSERT(false); 6212 6213 #undef AVERAGE 6214 } 6215 } 6216 else 6217 { 6218 // UNIMPLEMENTED(); 6219 } 6220 } 6221 } 6222