1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "Surface.hpp" 16 17 #include "Color.hpp" 18 #include "Context.hpp" 19 #include "ETC_Decoder.hpp" 20 #include "Renderer.hpp" 21 #include "Common/Half.hpp" 22 #include "Common/Memory.hpp" 23 #include "Common/CPUID.hpp" 24 #include "Common/Resource.hpp" 25 #include "Common/Debug.hpp" 26 #include "Reactor/Reactor.hpp" 27 28 #if defined(__i386__) || defined(__x86_64__) 29 #include <xmmintrin.h> 30 #include <emmintrin.h> 31 #endif 32 33 #undef min 34 #undef max 35 36 namespace sw 37 { 38 extern bool quadLayoutEnabled; 39 extern bool complementaryDepthBuffer; 40 extern TranscendentalPrecision logPrecision; 41 42 unsigned int *Surface::palette = 0; 43 unsigned int Surface::paletteID = 0; 44 write(int x,int y,int z,const Color<float> & color)45 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color) 46 { 47 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB; 48 49 for(int i = 0; i < samples; i++) 50 { 51 write(element, color); 52 element += sliceB; 53 } 54 } 55 write(int x,int y,const Color<float> & color)56 void Surface::Buffer::write(int x, int y, const Color<float> &color) 57 { 58 byte *element = (byte*)buffer + (x + border) * bytes + (y + border) * pitchB; 59 60 for(int i = 0; i < samples; i++) 61 { 62 write(element, color); 63 element += sliceB; 64 } 65 } 66 write(void * element,const Color<float> & color)67 inline void Surface::Buffer::write(void *element, const Color<float> &color) 68 { 69 float r = color.r; 70 float g = color.g; 71 float b = color.b; 72 float a = color.a; 73 74 if(isSRGBformat(format)) 75 { 76 r = linearToSRGB(r); 77 g = linearToSRGB(g); 78 b = linearToSRGB(b); 79 } 80 81 switch(format) 82 { 83 case FORMAT_A8: 84 *(unsigned char*)element = unorm<8>(a); 85 break; 86 case FORMAT_R8_SNORM: 87 *(char*)element = snorm<8>(r); 88 break; 89 case FORMAT_R8: 90 *(unsigned char*)element = unorm<8>(r); 91 break; 92 case FORMAT_R8I: 93 *(char*)element = scast<8>(r); 94 break; 95 case FORMAT_R8UI: 96 *(unsigned char*)element = ucast<8>(r); 97 break; 98 case FORMAT_R16I: 99 *(short*)element = scast<16>(r); 100 break; 101 case FORMAT_R16UI: 102 *(unsigned short*)element = ucast<16>(r); 103 break; 104 case FORMAT_R32I: 105 *(int*)element = static_cast<int>(r); 106 break; 107 case FORMAT_R32UI: 108 *(unsigned int*)element = static_cast<unsigned int>(r); 109 break; 110 case FORMAT_R3G3B2: 111 *(unsigned char*)element = (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0); 112 break; 113 case FORMAT_A8R3G3B2: 114 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<3>(r) << 5) | (unorm<3>(g) << 2) | (unorm<2>(b) << 0); 115 break; 116 case FORMAT_X4R4G4B4: 117 *(unsigned short*)element = 0xF000 | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0); 118 break; 119 case FORMAT_A4R4G4B4: 120 *(unsigned short*)element = (unorm<4>(a) << 12) | (unorm<4>(r) << 8) | (unorm<4>(g) << 4) | (unorm<4>(b) << 0); 121 break; 122 case FORMAT_R4G4B4A4: 123 *(unsigned short*)element = (unorm<4>(r) << 12) | (unorm<4>(g) << 8) | (unorm<4>(b) << 4) | (unorm<4>(a) << 0); 124 break; 125 case FORMAT_R5G6B5: 126 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<6>(g) << 5) | (unorm<5>(b) << 0); 127 break; 128 case FORMAT_A1R5G5B5: 129 *(unsigned short*)element = (unorm<1>(a) << 15) | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0); 130 break; 131 case FORMAT_R5G5B5A1: 132 *(unsigned short*)element = (unorm<5>(r) << 11) | (unorm<5>(g) << 6) | (unorm<5>(b) << 1) | (unorm<5>(a) << 0); 133 break; 134 case FORMAT_X1R5G5B5: 135 *(unsigned short*)element = 0x8000 | (unorm<5>(r) << 10) | (unorm<5>(g) << 5) | (unorm<5>(b) << 0); 136 break; 137 case FORMAT_A8R8G8B8: 138 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0); 139 break; 140 case FORMAT_X8R8G8B8: 141 *(unsigned int*)element = 0xFF000000 | (unorm<8>(r) << 16) | (unorm<8>(g) << 8) | (unorm<8>(b) << 0); 142 break; 143 case FORMAT_A8B8G8R8_SNORM: 144 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(a)) << 24) | 145 (static_cast<unsigned int>(snorm<8>(b)) << 16) | 146 (static_cast<unsigned int>(snorm<8>(g)) << 8) | 147 (static_cast<unsigned int>(snorm<8>(r)) << 0); 148 break; 149 case FORMAT_A8B8G8R8: 150 case FORMAT_SRGB8_A8: 151 *(unsigned int*)element = (unorm<8>(a) << 24) | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 152 break; 153 case FORMAT_A8B8G8R8I: 154 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(a)) << 24) | 155 (static_cast<unsigned int>(scast<8>(b)) << 16) | 156 (static_cast<unsigned int>(scast<8>(g)) << 8) | 157 (static_cast<unsigned int>(scast<8>(r)) << 0); 158 break; 159 case FORMAT_A8B8G8R8UI: 160 *(unsigned int*)element = (ucast<8>(a) << 24) | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 161 break; 162 case FORMAT_X8B8G8R8_SNORM: 163 *(unsigned int*)element = 0x7F000000 | 164 (static_cast<unsigned int>(snorm<8>(b)) << 16) | 165 (static_cast<unsigned int>(snorm<8>(g)) << 8) | 166 (static_cast<unsigned int>(snorm<8>(r)) << 0); 167 break; 168 case FORMAT_X8B8G8R8: 169 case FORMAT_SRGB8_X8: 170 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 171 break; 172 case FORMAT_X8B8G8R8I: 173 *(unsigned int*)element = 0x7F000000 | 174 (static_cast<unsigned int>(scast<8>(b)) << 16) | 175 (static_cast<unsigned int>(scast<8>(g)) << 8) | 176 (static_cast<unsigned int>(scast<8>(r)) << 0); 177 case FORMAT_X8B8G8R8UI: 178 *(unsigned int*)element = 0xFF000000 | (ucast<8>(b) << 16) | (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 179 break; 180 case FORMAT_A2R10G10B10: 181 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(r) << 20) | (unorm<10>(g) << 10) | (unorm<10>(b) << 0); 182 break; 183 case FORMAT_A2B10G10R10: 184 case FORMAT_A2B10G10R10UI: 185 *(unsigned int*)element = (unorm<2>(a) << 30) | (unorm<10>(b) << 20) | (unorm<10>(g) << 10) | (unorm<10>(r) << 0); 186 break; 187 case FORMAT_G8R8_SNORM: 188 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(g)) << 8) | 189 (static_cast<unsigned short>(snorm<8>(r)) << 0); 190 break; 191 case FORMAT_G8R8: 192 *(unsigned short*)element = (unorm<8>(g) << 8) | (unorm<8>(r) << 0); 193 break; 194 case FORMAT_G8R8I: 195 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(g)) << 8) | 196 (static_cast<unsigned short>(scast<8>(r)) << 0); 197 break; 198 case FORMAT_G8R8UI: 199 *(unsigned short*)element = (ucast<8>(g) << 8) | (ucast<8>(r) << 0); 200 break; 201 case FORMAT_G16R16: 202 *(unsigned int*)element = (unorm<16>(g) << 16) | (unorm<16>(r) << 0); 203 break; 204 case FORMAT_G16R16I: 205 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(g)) << 16) | 206 (static_cast<unsigned int>(scast<16>(r)) << 0); 207 break; 208 case FORMAT_G16R16UI: 209 *(unsigned int*)element = (ucast<16>(g) << 16) | (ucast<16>(r) << 0); 210 break; 211 case FORMAT_G32R32I: 212 case FORMAT_G32R32UI: 213 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 214 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 215 break; 216 case FORMAT_A16B16G16R16: 217 ((unsigned short*)element)[0] = unorm<16>(r); 218 ((unsigned short*)element)[1] = unorm<16>(g); 219 ((unsigned short*)element)[2] = unorm<16>(b); 220 ((unsigned short*)element)[3] = unorm<16>(a); 221 break; 222 case FORMAT_A16B16G16R16I: 223 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r)); 224 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g)); 225 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b)); 226 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(a)); 227 break; 228 case FORMAT_A16B16G16R16UI: 229 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r)); 230 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g)); 231 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b)); 232 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(a)); 233 break; 234 case FORMAT_X16B16G16R16I: 235 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(r)); 236 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(g)); 237 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(b)); 238 break; 239 case FORMAT_X16B16G16R16UI: 240 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(r)); 241 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(g)); 242 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(b)); 243 break; 244 case FORMAT_A32B32G32R32I: 245 case FORMAT_A32B32G32R32UI: 246 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 247 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 248 ((unsigned int*)element)[2] = static_cast<unsigned int>(b); 249 ((unsigned int*)element)[3] = static_cast<unsigned int>(a); 250 break; 251 case FORMAT_X32B32G32R32I: 252 case FORMAT_X32B32G32R32UI: 253 ((unsigned int*)element)[0] = static_cast<unsigned int>(r); 254 ((unsigned int*)element)[1] = static_cast<unsigned int>(g); 255 ((unsigned int*)element)[2] = static_cast<unsigned int>(b); 256 break; 257 case FORMAT_V8U8: 258 *(unsigned short*)element = (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 259 break; 260 case FORMAT_L6V5U5: 261 *(unsigned short*)element = (unorm<6>(b) << 10) | (snorm<5>(g) << 5) | (snorm<5>(r) << 0); 262 break; 263 case FORMAT_Q8W8V8U8: 264 *(unsigned int*)element = (snorm<8>(a) << 24) | (snorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 265 break; 266 case FORMAT_X8L8V8U8: 267 *(unsigned int*)element = 0xFF000000 | (unorm<8>(b) << 16) | (snorm<8>(g) << 8) | (snorm<8>(r) << 0); 268 break; 269 case FORMAT_V16U16: 270 *(unsigned int*)element = (snorm<16>(g) << 16) | (snorm<16>(r) << 0); 271 break; 272 case FORMAT_A2W10V10U10: 273 *(unsigned int*)element = (unorm<2>(a) << 30) | (snorm<10>(b) << 20) | (snorm<10>(g) << 10) | (snorm<10>(r) << 0); 274 break; 275 case FORMAT_A16W16V16U16: 276 ((unsigned short*)element)[0] = snorm<16>(r); 277 ((unsigned short*)element)[1] = snorm<16>(g); 278 ((unsigned short*)element)[2] = snorm<16>(b); 279 ((unsigned short*)element)[3] = unorm<16>(a); 280 break; 281 case FORMAT_Q16W16V16U16: 282 ((unsigned short*)element)[0] = snorm<16>(r); 283 ((unsigned short*)element)[1] = snorm<16>(g); 284 ((unsigned short*)element)[2] = snorm<16>(b); 285 ((unsigned short*)element)[3] = snorm<16>(a); 286 break; 287 case FORMAT_R8G8B8: 288 ((unsigned char*)element)[0] = unorm<8>(b); 289 ((unsigned char*)element)[1] = unorm<8>(g); 290 ((unsigned char*)element)[2] = unorm<8>(r); 291 break; 292 case FORMAT_B8G8R8: 293 ((unsigned char*)element)[0] = unorm<8>(r); 294 ((unsigned char*)element)[1] = unorm<8>(g); 295 ((unsigned char*)element)[2] = unorm<8>(b); 296 break; 297 case FORMAT_R16F: 298 *(half*)element = (half)r; 299 break; 300 case FORMAT_A16F: 301 *(half*)element = (half)a; 302 break; 303 case FORMAT_G16R16F: 304 ((half*)element)[0] = (half)r; 305 ((half*)element)[1] = (half)g; 306 break; 307 case FORMAT_X16B16G16R16F_UNSIGNED: 308 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f); 309 // Fall through to FORMAT_X16B16G16R16F. 310 case FORMAT_X16B16G16R16F: 311 ((half*)element)[3] = 1.0f; 312 // Fall through to FORMAT_B16G16R16F. 313 case FORMAT_B16G16R16F: 314 ((half*)element)[0] = (half)r; 315 ((half*)element)[1] = (half)g; 316 ((half*)element)[2] = (half)b; 317 break; 318 case FORMAT_A16B16G16R16F: 319 ((half*)element)[0] = (half)r; 320 ((half*)element)[1] = (half)g; 321 ((half*)element)[2] = (half)b; 322 ((half*)element)[3] = (half)a; 323 break; 324 case FORMAT_A32F: 325 *(float*)element = a; 326 break; 327 case FORMAT_R32F: 328 *(float*)element = r; 329 break; 330 case FORMAT_G32R32F: 331 ((float*)element)[0] = r; 332 ((float*)element)[1] = g; 333 break; 334 case FORMAT_X32B32G32R32F_UNSIGNED: 335 r = max(r, 0.0f); g = max(g, 0.0f); b = max(b, 0.0f); 336 // Fall through to FORMAT_X32B32G32R32F. 337 case FORMAT_X32B32G32R32F: 338 ((float*)element)[3] = 1.0f; 339 // Fall through to FORMAT_B32G32R32F. 340 case FORMAT_B32G32R32F: 341 ((float*)element)[0] = r; 342 ((float*)element)[1] = g; 343 ((float*)element)[2] = b; 344 break; 345 case FORMAT_A32B32G32R32F: 346 ((float*)element)[0] = r; 347 ((float*)element)[1] = g; 348 ((float*)element)[2] = b; 349 ((float*)element)[3] = a; 350 break; 351 case FORMAT_D32F: 352 case FORMAT_D32FS8: 353 case FORMAT_D32F_LOCKABLE: 354 case FORMAT_D32FS8_TEXTURE: 355 case FORMAT_D32F_SHADOW: 356 case FORMAT_D32FS8_SHADOW: 357 *((float*)element) = r; 358 break; 359 case FORMAT_D32F_COMPLEMENTARY: 360 case FORMAT_D32FS8_COMPLEMENTARY: 361 *((float*)element) = 1 - r; 362 break; 363 case FORMAT_S8: 364 *((unsigned char*)element) = unorm<8>(r); 365 break; 366 case FORMAT_L8: 367 *(unsigned char*)element = unorm<8>(r); 368 break; 369 case FORMAT_A4L4: 370 *(unsigned char*)element = (unorm<4>(a) << 4) | (unorm<4>(r) << 0); 371 break; 372 case FORMAT_L16: 373 *(unsigned short*)element = unorm<16>(r); 374 break; 375 case FORMAT_A8L8: 376 *(unsigned short*)element = (unorm<8>(a) << 8) | (unorm<8>(r) << 0); 377 break; 378 case FORMAT_L16F: 379 *(half*)element = (half)r; 380 break; 381 case FORMAT_A16L16F: 382 ((half*)element)[0] = (half)r; 383 ((half*)element)[1] = (half)a; 384 break; 385 case FORMAT_L32F: 386 *(float*)element = r; 387 break; 388 case FORMAT_A32L32F: 389 ((float*)element)[0] = r; 390 ((float*)element)[1] = a; 391 break; 392 default: 393 ASSERT(false); 394 } 395 } 396 read(int x,int y,int z) const397 Color<float> Surface::Buffer::read(int x, int y, int z) const 398 { 399 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB + z * samples * sliceB; 400 401 return read(element); 402 } 403 read(int x,int y) const404 Color<float> Surface::Buffer::read(int x, int y) const 405 { 406 void *element = (unsigned char*)buffer + (x + border) * bytes + (y + border) * pitchB; 407 408 return read(element); 409 } 410 read(void * element) const411 inline Color<float> Surface::Buffer::read(void *element) const 412 { 413 float r = 0.0f; 414 float g = 0.0f; 415 float b = 0.0f; 416 float a = 1.0f; 417 418 switch(format) 419 { 420 case FORMAT_P8: 421 { 422 ASSERT(palette); 423 424 unsigned int abgr = palette[*(unsigned char*)element]; 425 426 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 427 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 428 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 429 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 430 } 431 break; 432 case FORMAT_A8P8: 433 { 434 ASSERT(palette); 435 436 unsigned int bgr = palette[((unsigned char*)element)[0]]; 437 438 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF); 439 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00); 440 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000); 441 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 442 } 443 break; 444 case FORMAT_A8: 445 r = 0; 446 g = 0; 447 b = 0; 448 a = *(unsigned char*)element * (1.0f / 0xFF); 449 break; 450 case FORMAT_R8_SNORM: 451 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f); 452 break; 453 case FORMAT_R8: 454 r = *(unsigned char*)element * (1.0f / 0xFF); 455 break; 456 case FORMAT_R8I: 457 r = *(signed char*)element; 458 break; 459 case FORMAT_R8UI: 460 r = *(unsigned char*)element; 461 break; 462 case FORMAT_R3G3B2: 463 { 464 unsigned char rgb = *(unsigned char*)element; 465 466 r = (rgb & 0xE0) * (1.0f / 0xE0); 467 g = (rgb & 0x1C) * (1.0f / 0x1C); 468 b = (rgb & 0x03) * (1.0f / 0x03); 469 } 470 break; 471 case FORMAT_A8R3G3B2: 472 { 473 unsigned short argb = *(unsigned short*)element; 474 475 a = (argb & 0xFF00) * (1.0f / 0xFF00); 476 r = (argb & 0x00E0) * (1.0f / 0x00E0); 477 g = (argb & 0x001C) * (1.0f / 0x001C); 478 b = (argb & 0x0003) * (1.0f / 0x0003); 479 } 480 break; 481 case FORMAT_X4R4G4B4: 482 { 483 unsigned short rgb = *(unsigned short*)element; 484 485 r = (rgb & 0x0F00) * (1.0f / 0x0F00); 486 g = (rgb & 0x00F0) * (1.0f / 0x00F0); 487 b = (rgb & 0x000F) * (1.0f / 0x000F); 488 } 489 break; 490 case FORMAT_A4R4G4B4: 491 { 492 unsigned short argb = *(unsigned short*)element; 493 494 a = (argb & 0xF000) * (1.0f / 0xF000); 495 r = (argb & 0x0F00) * (1.0f / 0x0F00); 496 g = (argb & 0x00F0) * (1.0f / 0x00F0); 497 b = (argb & 0x000F) * (1.0f / 0x000F); 498 } 499 break; 500 case FORMAT_R4G4B4A4: 501 { 502 unsigned short rgba = *(unsigned short*)element; 503 504 r = (rgba & 0xF000) * (1.0f / 0xF000); 505 g = (rgba & 0x0F00) * (1.0f / 0x0F00); 506 b = (rgba & 0x00F0) * (1.0f / 0x00F0); 507 a = (rgba & 0x000F) * (1.0f / 0x000F); 508 } 509 break; 510 case FORMAT_R5G6B5: 511 { 512 unsigned short rgb = *(unsigned short*)element; 513 514 r = (rgb & 0xF800) * (1.0f / 0xF800); 515 g = (rgb & 0x07E0) * (1.0f / 0x07E0); 516 b = (rgb & 0x001F) * (1.0f / 0x001F); 517 } 518 break; 519 case FORMAT_A1R5G5B5: 520 { 521 unsigned short argb = *(unsigned short*)element; 522 523 a = (argb & 0x8000) * (1.0f / 0x8000); 524 r = (argb & 0x7C00) * (1.0f / 0x7C00); 525 g = (argb & 0x03E0) * (1.0f / 0x03E0); 526 b = (argb & 0x001F) * (1.0f / 0x001F); 527 } 528 break; 529 case FORMAT_R5G5B5A1: 530 { 531 unsigned short rgba = *(unsigned short*)element; 532 533 r = (rgba & 0xF800) * (1.0f / 0xF800); 534 g = (rgba & 0x07C0) * (1.0f / 0x07C0); 535 b = (rgba & 0x003E) * (1.0f / 0x003E); 536 a = (rgba & 0x0001) * (1.0f / 0x0001); 537 } 538 break; 539 case FORMAT_X1R5G5B5: 540 { 541 unsigned short xrgb = *(unsigned short*)element; 542 543 r = (xrgb & 0x7C00) * (1.0f / 0x7C00); 544 g = (xrgb & 0x03E0) * (1.0f / 0x03E0); 545 b = (xrgb & 0x001F) * (1.0f / 0x001F); 546 } 547 break; 548 case FORMAT_A8R8G8B8: 549 { 550 unsigned int argb = *(unsigned int*)element; 551 552 a = (argb & 0xFF000000) * (1.0f / 0xFF000000); 553 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000); 554 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00); 555 b = (argb & 0x000000FF) * (1.0f / 0x000000FF); 556 } 557 break; 558 case FORMAT_X8R8G8B8: 559 { 560 unsigned int xrgb = *(unsigned int*)element; 561 562 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000); 563 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00); 564 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF); 565 } 566 break; 567 case FORMAT_A8B8G8R8_SNORM: 568 { 569 signed char* abgr = (signed char*)element; 570 571 r = max(abgr[0] * (1.0f / 0x7F), -1.0f); 572 g = max(abgr[1] * (1.0f / 0x7F), -1.0f); 573 b = max(abgr[2] * (1.0f / 0x7F), -1.0f); 574 a = max(abgr[3] * (1.0f / 0x7F), -1.0f); 575 } 576 break; 577 case FORMAT_A8B8G8R8: 578 case FORMAT_SRGB8_A8: 579 { 580 unsigned int abgr = *(unsigned int*)element; 581 582 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 583 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 584 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 585 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 586 } 587 break; 588 case FORMAT_A8B8G8R8I: 589 { 590 signed char* abgr = (signed char*)element; 591 592 r = abgr[0]; 593 g = abgr[1]; 594 b = abgr[2]; 595 a = abgr[3]; 596 } 597 break; 598 case FORMAT_A8B8G8R8UI: 599 { 600 unsigned char* abgr = (unsigned char*)element; 601 602 r = abgr[0]; 603 g = abgr[1]; 604 b = abgr[2]; 605 a = abgr[3]; 606 } 607 break; 608 case FORMAT_X8B8G8R8_SNORM: 609 { 610 signed char* bgr = (signed char*)element; 611 612 r = max(bgr[0] * (1.0f / 0x7F), -1.0f); 613 g = max(bgr[1] * (1.0f / 0x7F), -1.0f); 614 b = max(bgr[2] * (1.0f / 0x7F), -1.0f); 615 } 616 break; 617 case FORMAT_X8B8G8R8: 618 case FORMAT_SRGB8_X8: 619 { 620 unsigned int xbgr = *(unsigned int*)element; 621 622 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000); 623 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00); 624 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF); 625 } 626 break; 627 case FORMAT_X8B8G8R8I: 628 { 629 signed char* bgr = (signed char*)element; 630 631 r = bgr[0]; 632 g = bgr[1]; 633 b = bgr[2]; 634 } 635 break; 636 case FORMAT_X8B8G8R8UI: 637 { 638 unsigned char* bgr = (unsigned char*)element; 639 640 r = bgr[0]; 641 g = bgr[1]; 642 b = bgr[2]; 643 } 644 break; 645 case FORMAT_G8R8_SNORM: 646 { 647 signed char* gr = (signed char*)element; 648 649 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00); 650 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF); 651 } 652 break; 653 case FORMAT_G8R8: 654 { 655 unsigned short gr = *(unsigned short*)element; 656 657 g = (gr & 0xFF00) * (1.0f / 0xFF00); 658 r = (gr & 0x00FF) * (1.0f / 0x00FF); 659 } 660 break; 661 case FORMAT_G8R8I: 662 { 663 signed char* gr = (signed char*)element; 664 665 r = gr[0]; 666 g = gr[1]; 667 } 668 break; 669 case FORMAT_G8R8UI: 670 { 671 unsigned char* gr = (unsigned char*)element; 672 673 r = gr[0]; 674 g = gr[1]; 675 } 676 break; 677 case FORMAT_R16I: 678 r = *((short*)element); 679 break; 680 case FORMAT_R16UI: 681 r = *((unsigned short*)element); 682 break; 683 case FORMAT_G16R16I: 684 { 685 short* gr = (short*)element; 686 687 r = gr[0]; 688 g = gr[1]; 689 } 690 break; 691 case FORMAT_G16R16: 692 { 693 unsigned int gr = *(unsigned int*)element; 694 695 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000); 696 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF); 697 } 698 break; 699 case FORMAT_G16R16UI: 700 { 701 unsigned short* gr = (unsigned short*)element; 702 703 r = gr[0]; 704 g = gr[1]; 705 } 706 break; 707 case FORMAT_A2R10G10B10: 708 { 709 unsigned int argb = *(unsigned int*)element; 710 711 a = (argb & 0xC0000000) * (1.0f / 0xC0000000); 712 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000); 713 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00); 714 b = (argb & 0x000003FF) * (1.0f / 0x000003FF); 715 } 716 break; 717 case FORMAT_A2B10G10R10: 718 { 719 unsigned int abgr = *(unsigned int*)element; 720 721 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000); 722 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000); 723 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00); 724 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF); 725 } 726 break; 727 case FORMAT_A2B10G10R10UI: 728 { 729 unsigned int abgr = *(unsigned int*)element; 730 731 a = static_cast<float>((abgr & 0xC0000000) >> 30); 732 b = static_cast<float>((abgr & 0x3FF00000) >> 20); 733 g = static_cast<float>((abgr & 0x000FFC00) >> 10); 734 r = static_cast<float>(abgr & 0x000003FF); 735 } 736 break; 737 case FORMAT_A16B16G16R16I: 738 { 739 short* abgr = (short*)element; 740 741 r = abgr[0]; 742 g = abgr[1]; 743 b = abgr[2]; 744 a = abgr[3]; 745 } 746 break; 747 case FORMAT_A16B16G16R16: 748 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF); 749 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF); 750 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF); 751 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 752 break; 753 case FORMAT_A16B16G16R16UI: 754 { 755 unsigned short* abgr = (unsigned short*)element; 756 757 r = abgr[0]; 758 g = abgr[1]; 759 b = abgr[2]; 760 a = abgr[3]; 761 } 762 break; 763 case FORMAT_X16B16G16R16I: 764 { 765 short* bgr = (short*)element; 766 767 r = bgr[0]; 768 g = bgr[1]; 769 b = bgr[2]; 770 } 771 break; 772 case FORMAT_X16B16G16R16UI: 773 { 774 unsigned short* bgr = (unsigned short*)element; 775 776 r = bgr[0]; 777 g = bgr[1]; 778 b = bgr[2]; 779 } 780 break; 781 case FORMAT_A32B32G32R32I: 782 { 783 int* abgr = (int*)element; 784 785 r = static_cast<float>(abgr[0]); 786 g = static_cast<float>(abgr[1]); 787 b = static_cast<float>(abgr[2]); 788 a = static_cast<float>(abgr[3]); 789 } 790 break; 791 case FORMAT_A32B32G32R32UI: 792 { 793 unsigned int* abgr = (unsigned int*)element; 794 795 r = static_cast<float>(abgr[0]); 796 g = static_cast<float>(abgr[1]); 797 b = static_cast<float>(abgr[2]); 798 a = static_cast<float>(abgr[3]); 799 } 800 break; 801 case FORMAT_X32B32G32R32I: 802 { 803 int* bgr = (int*)element; 804 805 r = static_cast<float>(bgr[0]); 806 g = static_cast<float>(bgr[1]); 807 b = static_cast<float>(bgr[2]); 808 } 809 break; 810 case FORMAT_X32B32G32R32UI: 811 { 812 unsigned int* bgr = (unsigned int*)element; 813 814 r = static_cast<float>(bgr[0]); 815 g = static_cast<float>(bgr[1]); 816 b = static_cast<float>(bgr[2]); 817 } 818 break; 819 case FORMAT_G32R32I: 820 { 821 int* gr = (int*)element; 822 823 r = static_cast<float>(gr[0]); 824 g = static_cast<float>(gr[1]); 825 } 826 break; 827 case FORMAT_G32R32UI: 828 { 829 unsigned int* gr = (unsigned int*)element; 830 831 r = static_cast<float>(gr[0]); 832 g = static_cast<float>(gr[1]); 833 } 834 break; 835 case FORMAT_R32I: 836 r = static_cast<float>(*((int*)element)); 837 break; 838 case FORMAT_R32UI: 839 r = static_cast<float>(*((unsigned int*)element)); 840 break; 841 case FORMAT_V8U8: 842 { 843 unsigned short vu = *(unsigned short*)element; 844 845 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000); 846 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000); 847 } 848 break; 849 case FORMAT_L6V5U5: 850 { 851 unsigned short lvu = *(unsigned short*)element; 852 853 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000); 854 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000); 855 b = (lvu & 0xFC00) * (1.0f / 0xFC00); 856 } 857 break; 858 case FORMAT_Q8W8V8U8: 859 { 860 unsigned int qwvu = *(unsigned int*)element; 861 862 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 863 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 864 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000); 865 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000); 866 } 867 break; 868 case FORMAT_X8L8V8U8: 869 { 870 unsigned int xlvu = *(unsigned int*)element; 871 872 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 873 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 874 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000); 875 } 876 break; 877 case FORMAT_R8G8B8: 878 r = ((unsigned char*)element)[2] * (1.0f / 0xFF); 879 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 880 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 881 break; 882 case FORMAT_B8G8R8: 883 r = ((unsigned char*)element)[0] * (1.0f / 0xFF); 884 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 885 b = ((unsigned char*)element)[2] * (1.0f / 0xFF); 886 break; 887 case FORMAT_V16U16: 888 { 889 unsigned int vu = *(unsigned int*)element; 890 891 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000); 892 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000); 893 } 894 break; 895 case FORMAT_A2W10V10U10: 896 { 897 unsigned int awvu = *(unsigned int*)element; 898 899 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000); 900 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000); 901 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000); 902 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000); 903 } 904 break; 905 case FORMAT_A16W16V16U16: 906 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 907 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 908 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 909 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 910 break; 911 case FORMAT_Q16W16V16U16: 912 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 913 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 914 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 915 a = ((signed short*)element)[3] * (1.0f / 0x7FFF); 916 break; 917 case FORMAT_L8: 918 r = 919 g = 920 b = *(unsigned char*)element * (1.0f / 0xFF); 921 break; 922 case FORMAT_A4L4: 923 { 924 unsigned char al = *(unsigned char*)element; 925 926 r = 927 g = 928 b = (al & 0x0F) * (1.0f / 0x0F); 929 a = (al & 0xF0) * (1.0f / 0xF0); 930 } 931 break; 932 case FORMAT_L16: 933 r = 934 g = 935 b = *(unsigned short*)element * (1.0f / 0xFFFF); 936 break; 937 case FORMAT_A8L8: 938 r = 939 g = 940 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 941 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 942 break; 943 case FORMAT_L16F: 944 r = 945 g = 946 b = *(half*)element; 947 break; 948 case FORMAT_A16L16F: 949 r = 950 g = 951 b = ((half*)element)[0]; 952 a = ((half*)element)[1]; 953 break; 954 case FORMAT_L32F: 955 r = 956 g = 957 b = *(float*)element; 958 break; 959 case FORMAT_A32L32F: 960 r = 961 g = 962 b = ((float*)element)[0]; 963 a = ((float*)element)[1]; 964 break; 965 case FORMAT_A16F: 966 a = *(half*)element; 967 break; 968 case FORMAT_R16F: 969 r = *(half*)element; 970 break; 971 case FORMAT_G16R16F: 972 r = ((half*)element)[0]; 973 g = ((half*)element)[1]; 974 break; 975 case FORMAT_X16B16G16R16F: 976 case FORMAT_X16B16G16R16F_UNSIGNED: 977 case FORMAT_B16G16R16F: 978 r = ((half*)element)[0]; 979 g = ((half*)element)[1]; 980 b = ((half*)element)[2]; 981 break; 982 case FORMAT_A16B16G16R16F: 983 r = ((half*)element)[0]; 984 g = ((half*)element)[1]; 985 b = ((half*)element)[2]; 986 a = ((half*)element)[3]; 987 break; 988 case FORMAT_A32F: 989 a = *(float*)element; 990 break; 991 case FORMAT_R32F: 992 r = *(float*)element; 993 break; 994 case FORMAT_G32R32F: 995 r = ((float*)element)[0]; 996 g = ((float*)element)[1]; 997 break; 998 case FORMAT_X32B32G32R32F: 999 case FORMAT_X32B32G32R32F_UNSIGNED: 1000 case FORMAT_B32G32R32F: 1001 r = ((float*)element)[0]; 1002 g = ((float*)element)[1]; 1003 b = ((float*)element)[2]; 1004 break; 1005 case FORMAT_A32B32G32R32F: 1006 r = ((float*)element)[0]; 1007 g = ((float*)element)[1]; 1008 b = ((float*)element)[2]; 1009 a = ((float*)element)[3]; 1010 break; 1011 case FORMAT_D32F: 1012 case FORMAT_D32FS8: 1013 case FORMAT_D32F_LOCKABLE: 1014 case FORMAT_D32FS8_TEXTURE: 1015 case FORMAT_D32F_SHADOW: 1016 case FORMAT_D32FS8_SHADOW: 1017 r = *(float*)element; 1018 g = r; 1019 b = r; 1020 a = r; 1021 break; 1022 case FORMAT_D32F_COMPLEMENTARY: 1023 case FORMAT_D32FS8_COMPLEMENTARY: 1024 r = 1.0f - *(float*)element; 1025 g = r; 1026 b = r; 1027 a = r; 1028 break; 1029 case FORMAT_S8: 1030 r = *(unsigned char*)element * (1.0f / 0xFF); 1031 break; 1032 default: 1033 ASSERT(false); 1034 } 1035 1036 if(isSRGBformat(format)) 1037 { 1038 r = sRGBtoLinear(r); 1039 g = sRGBtoLinear(g); 1040 b = sRGBtoLinear(b); 1041 } 1042 1043 return Color<float>(r, g, b, a); 1044 } 1045 sample(float x,float y,float z) const1046 Color<float> Surface::Buffer::sample(float x, float y, float z) const 1047 { 1048 x -= 0.5f; 1049 y -= 0.5f; 1050 z -= 0.5f; 1051 1052 int x0 = clamp((int)x, 0, width - 1); 1053 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1054 1055 int y0 = clamp((int)y, 0, height - 1); 1056 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1057 1058 int z0 = clamp((int)z, 0, depth - 1); 1059 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1; 1060 1061 Color<float> c000 = read(x0, y0, z0); 1062 Color<float> c100 = read(x1, y0, z0); 1063 Color<float> c010 = read(x0, y1, z0); 1064 Color<float> c110 = read(x1, y1, z0); 1065 Color<float> c001 = read(x0, y0, z1); 1066 Color<float> c101 = read(x1, y0, z1); 1067 Color<float> c011 = read(x0, y1, z1); 1068 Color<float> c111 = read(x1, y1, z1); 1069 1070 float fx = x - x0; 1071 float fy = y - y0; 1072 float fz = z - z0; 1073 1074 c000 *= (1 - fx) * (1 - fy) * (1 - fz); 1075 c100 *= fx * (1 - fy) * (1 - fz); 1076 c010 *= (1 - fx) * fy * (1 - fz); 1077 c110 *= fx * fy * (1 - fz); 1078 c001 *= (1 - fx) * (1 - fy) * fz; 1079 c101 *= fx * (1 - fy) * fz; 1080 c011 *= (1 - fx) * fy * fz; 1081 c111 *= fx * fy * fz; 1082 1083 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111; 1084 } 1085 sample(float x,float y,int layer) const1086 Color<float> Surface::Buffer::sample(float x, float y, int layer) const 1087 { 1088 x -= 0.5f; 1089 y -= 0.5f; 1090 1091 int x0 = clamp((int)x, 0, width - 1); 1092 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1093 1094 int y0 = clamp((int)y, 0, height - 1); 1095 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1096 1097 Color<float> c00 = read(x0, y0, layer); 1098 Color<float> c10 = read(x1, y0, layer); 1099 Color<float> c01 = read(x0, y1, layer); 1100 Color<float> c11 = read(x1, y1, layer); 1101 1102 float fx = x - x0; 1103 float fy = y - y0; 1104 1105 c00 *= (1 - fx) * (1 - fy); 1106 c10 *= fx * (1 - fy); 1107 c01 *= (1 - fx) * fy; 1108 c11 *= fx * fy; 1109 1110 return c00 + c10 + c01 + c11; 1111 } 1112 lockRect(int x,int y,int z,Lock lock)1113 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock) 1114 { 1115 this->lock = lock; 1116 1117 switch(lock) 1118 { 1119 case LOCK_UNLOCKED: 1120 case LOCK_READONLY: 1121 case LOCK_UPDATE: 1122 break; 1123 case LOCK_WRITEONLY: 1124 case LOCK_READWRITE: 1125 case LOCK_DISCARD: 1126 dirty = true; 1127 break; 1128 default: 1129 ASSERT(false); 1130 } 1131 1132 if(buffer) 1133 { 1134 x += border; 1135 y += border; 1136 1137 switch(format) 1138 { 1139 case FORMAT_DXT1: 1140 case FORMAT_ATI1: 1141 case FORMAT_ETC1: 1142 case FORMAT_R11_EAC: 1143 case FORMAT_SIGNED_R11_EAC: 1144 case FORMAT_RGB8_ETC2: 1145 case FORMAT_SRGB8_ETC2: 1146 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1147 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1148 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1149 case FORMAT_RG11_EAC: 1150 case FORMAT_SIGNED_RG11_EAC: 1151 case FORMAT_RGBA8_ETC2_EAC: 1152 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1153 case FORMAT_RGBA_ASTC_4x4_KHR: 1154 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1155 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1156 case FORMAT_RGBA_ASTC_5x4_KHR: 1157 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1158 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB; 1159 case FORMAT_RGBA_ASTC_5x5_KHR: 1160 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1161 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB; 1162 case FORMAT_RGBA_ASTC_6x5_KHR: 1163 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1164 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB; 1165 case FORMAT_RGBA_ASTC_6x6_KHR: 1166 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1167 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB; 1168 case FORMAT_RGBA_ASTC_8x5_KHR: 1169 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1170 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB; 1171 case FORMAT_RGBA_ASTC_8x6_KHR: 1172 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1173 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB; 1174 case FORMAT_RGBA_ASTC_8x8_KHR: 1175 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1176 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB; 1177 case FORMAT_RGBA_ASTC_10x5_KHR: 1178 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1179 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB; 1180 case FORMAT_RGBA_ASTC_10x6_KHR: 1181 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1182 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB; 1183 case FORMAT_RGBA_ASTC_10x8_KHR: 1184 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1185 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB; 1186 case FORMAT_RGBA_ASTC_10x10_KHR: 1187 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1188 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB; 1189 case FORMAT_RGBA_ASTC_12x10_KHR: 1190 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1191 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB; 1192 case FORMAT_RGBA_ASTC_12x12_KHR: 1193 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1194 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB; 1195 case FORMAT_DXT3: 1196 case FORMAT_DXT5: 1197 case FORMAT_ATI2: 1198 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1199 default: 1200 return (unsigned char*)buffer + x * bytes + y * pitchB + z * samples * sliceB; 1201 } 1202 } 1203 1204 return nullptr; 1205 } 1206 unlockRect()1207 void Surface::Buffer::unlockRect() 1208 { 1209 lock = LOCK_UNLOCKED; 1210 } 1211 1212 class SurfaceImplementation : public Surface 1213 { 1214 public: SurfaceImplementation(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1215 SurfaceImplementation(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1216 : Surface(width, height, depth, format, pixels, pitch, slice) {} SurfaceImplementation(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchP=0)1217 SurfaceImplementation(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchP = 0) 1218 : Surface(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchP) {} ~SurfaceImplementation()1219 ~SurfaceImplementation() override {}; 1220 lockInternal(int x,int y,int z,Lock lock,Accessor client)1221 void *lockInternal(int x, int y, int z, Lock lock, Accessor client) override 1222 { 1223 return Surface::lockInternal(x, y, z, lock, client); 1224 } 1225 unlockInternal()1226 void unlockInternal() override 1227 { 1228 Surface::unlockInternal(); 1229 } 1230 }; 1231 create(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1232 Surface *Surface::create(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) 1233 { 1234 return new SurfaceImplementation(width, height, depth, format, pixels, pitch, slice); 1235 } 1236 create(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchPprovided)1237 Surface *Surface::create(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) 1238 { 1239 return new SurfaceImplementation(texture, width, height, depth, border, samples, format, lockable, renderTarget, pitchPprovided); 1240 } 1241 Surface(int width,int height,int depth,Format format,void * pixels,int pitch,int slice)1242 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false) 1243 { 1244 resource = new Resource(0); 1245 hasParent = false; 1246 ownExternal = false; 1247 depth = max(1, depth); 1248 1249 external.buffer = pixels; 1250 external.width = width; 1251 external.height = height; 1252 external.depth = depth; 1253 external.samples = 1; 1254 external.format = format; 1255 external.bytes = bytes(external.format); 1256 external.pitchB = pitch; 1257 external.pitchP = external.bytes ? pitch / external.bytes : 0; 1258 external.sliceB = slice; 1259 external.sliceP = external.bytes ? slice / external.bytes : 0; 1260 external.border = 0; 1261 external.lock = LOCK_UNLOCKED; 1262 external.dirty = true; 1263 1264 internal.buffer = nullptr; 1265 internal.width = width; 1266 internal.height = height; 1267 internal.depth = depth; 1268 internal.samples = 1; 1269 internal.format = selectInternalFormat(format); 1270 internal.bytes = bytes(internal.format); 1271 internal.pitchB = pitchB(internal.width, 0, internal.format, false); 1272 internal.pitchP = pitchP(internal.width, 0, internal.format, false); 1273 internal.sliceB = sliceB(internal.width, internal.height, 0, internal.format, false); 1274 internal.sliceP = sliceP(internal.width, internal.height, 0, internal.format, false); 1275 internal.border = 0; 1276 internal.lock = LOCK_UNLOCKED; 1277 internal.dirty = false; 1278 1279 stencil.buffer = nullptr; 1280 stencil.width = width; 1281 stencil.height = height; 1282 stencil.depth = depth; 1283 stencil.samples = 1; 1284 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL; 1285 stencil.bytes = bytes(stencil.format); 1286 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, false); 1287 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, false); 1288 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, false); 1289 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, false); 1290 stencil.border = 0; 1291 stencil.lock = LOCK_UNLOCKED; 1292 stencil.dirty = false; 1293 1294 dirtyContents = true; 1295 paletteUsed = 0; 1296 } 1297 Surface(Resource * texture,int width,int height,int depth,int border,int samples,Format format,bool lockable,bool renderTarget,int pitchPprovided)1298 Surface::Surface(Resource *texture, int width, int height, int depth, int border, int samples, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget) 1299 { 1300 resource = texture ? texture : new Resource(0); 1301 hasParent = texture != nullptr; 1302 ownExternal = true; 1303 depth = max(1, depth); 1304 samples = max(1, samples); 1305 1306 external.buffer = nullptr; 1307 external.width = width; 1308 external.height = height; 1309 external.depth = depth; 1310 external.samples = (short)samples; 1311 external.format = format; 1312 external.bytes = bytes(external.format); 1313 external.pitchB = pitchB(external.width, 0, external.format, renderTarget && !texture); 1314 external.pitchP = pitchP(external.width, 0, external.format, renderTarget && !texture); 1315 external.sliceB = sliceB(external.width, external.height, 0, external.format, renderTarget && !texture); 1316 external.sliceP = sliceP(external.width, external.height, 0, external.format, renderTarget && !texture); 1317 external.border = 0; 1318 external.lock = LOCK_UNLOCKED; 1319 external.dirty = false; 1320 1321 internal.buffer = nullptr; 1322 internal.width = width; 1323 internal.height = height; 1324 internal.depth = depth; 1325 internal.samples = (short)samples; 1326 internal.format = selectInternalFormat(format); 1327 internal.bytes = bytes(internal.format); 1328 internal.pitchB = !pitchPprovided ? pitchB(internal.width, border, internal.format, renderTarget) : pitchPprovided * internal.bytes; 1329 internal.pitchP = !pitchPprovided ? pitchP(internal.width, border, internal.format, renderTarget) : pitchPprovided; 1330 internal.sliceB = sliceB(internal.width, internal.height, border, internal.format, renderTarget); 1331 internal.sliceP = sliceP(internal.width, internal.height, border, internal.format, renderTarget); 1332 internal.border = (short)border; 1333 internal.lock = LOCK_UNLOCKED; 1334 internal.dirty = false; 1335 1336 stencil.buffer = nullptr; 1337 stencil.width = width; 1338 stencil.height = height; 1339 stencil.depth = depth; 1340 stencil.samples = (short)samples; 1341 stencil.format = isStencil(format) ? FORMAT_S8 : FORMAT_NULL; 1342 stencil.bytes = bytes(stencil.format); 1343 stencil.pitchB = pitchB(stencil.width, 0, stencil.format, renderTarget); 1344 stencil.pitchP = pitchP(stencil.width, 0, stencil.format, renderTarget); 1345 stencil.sliceB = sliceB(stencil.width, stencil.height, 0, stencil.format, renderTarget); 1346 stencil.sliceP = sliceP(stencil.width, stencil.height, 0, stencil.format, renderTarget); 1347 stencil.border = 0; 1348 stencil.lock = LOCK_UNLOCKED; 1349 stencil.dirty = false; 1350 1351 dirtyContents = true; 1352 paletteUsed = 0; 1353 } 1354 ~Surface()1355 Surface::~Surface() 1356 { 1357 // sync() must be called before this destructor to ensure all locks have been released. 1358 // We can't call it here because the parent resource may already have been destroyed. 1359 ASSERT(isUnlocked()); 1360 1361 if(!hasParent) 1362 { 1363 resource->destruct(); 1364 } 1365 1366 if(ownExternal) 1367 { 1368 deallocate(external.buffer); 1369 } 1370 1371 if(internal.buffer != external.buffer) 1372 { 1373 deallocate(internal.buffer); 1374 } 1375 1376 deallocate(stencil.buffer); 1377 1378 external.buffer = 0; 1379 internal.buffer = 0; 1380 stencil.buffer = 0; 1381 } 1382 lockExternal(int x,int y,int z,Lock lock,Accessor client)1383 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client) 1384 { 1385 resource->lock(client); 1386 1387 if(!external.buffer) 1388 { 1389 if(internal.buffer && identicalFormats()) 1390 { 1391 external.buffer = internal.buffer; 1392 } 1393 else 1394 { 1395 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.border, external.samples, external.format); 1396 } 1397 } 1398 1399 if(internal.dirty) 1400 { 1401 if(lock != LOCK_DISCARD) 1402 { 1403 update(external, internal); 1404 } 1405 1406 internal.dirty = false; 1407 } 1408 1409 switch(lock) 1410 { 1411 case LOCK_READONLY: 1412 break; 1413 case LOCK_WRITEONLY: 1414 case LOCK_READWRITE: 1415 case LOCK_DISCARD: 1416 dirtyContents = true; 1417 break; 1418 default: 1419 ASSERT(false); 1420 } 1421 1422 return external.lockRect(x, y, z, lock); 1423 } 1424 unlockExternal()1425 void Surface::unlockExternal() 1426 { 1427 external.unlockRect(); 1428 1429 resource->unlock(); 1430 } 1431 lockInternal(int x,int y,int z,Lock lock,Accessor client)1432 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client) 1433 { 1434 if(lock != LOCK_UNLOCKED) 1435 { 1436 resource->lock(client); 1437 } 1438 1439 if(!internal.buffer) 1440 { 1441 if(external.buffer && identicalFormats()) 1442 { 1443 internal.buffer = external.buffer; 1444 } 1445 else 1446 { 1447 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.border, internal.samples, internal.format); 1448 } 1449 } 1450 1451 // FIXME: WHQL requires conversion to lower external precision and back 1452 if(logPrecision >= WHQL) 1453 { 1454 if(internal.dirty && renderTarget && internal.format != external.format) 1455 { 1456 if(lock != LOCK_DISCARD) 1457 { 1458 switch(external.format) 1459 { 1460 case FORMAT_R3G3B2: 1461 case FORMAT_A8R3G3B2: 1462 case FORMAT_A1R5G5B5: 1463 case FORMAT_A2R10G10B10: 1464 case FORMAT_A2B10G10R10: 1465 lockExternal(0, 0, 0, LOCK_READWRITE, client); 1466 unlockExternal(); 1467 break; 1468 default: 1469 // Difference passes WHQL 1470 break; 1471 } 1472 } 1473 } 1474 } 1475 1476 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID)) 1477 { 1478 if(lock != LOCK_DISCARD) 1479 { 1480 update(internal, external); 1481 } 1482 1483 external.dirty = false; 1484 paletteUsed = Surface::paletteID; 1485 } 1486 1487 switch(lock) 1488 { 1489 case LOCK_UNLOCKED: 1490 case LOCK_READONLY: 1491 break; 1492 case LOCK_WRITEONLY: 1493 case LOCK_READWRITE: 1494 case LOCK_DISCARD: 1495 dirtyContents = true; 1496 break; 1497 default: 1498 ASSERT(false); 1499 } 1500 1501 if(lock == LOCK_READONLY && client == PUBLIC) 1502 { 1503 resolve(); 1504 } 1505 1506 return internal.lockRect(x, y, z, lock); 1507 } 1508 unlockInternal()1509 void Surface::unlockInternal() 1510 { 1511 internal.unlockRect(); 1512 1513 resource->unlock(); 1514 } 1515 lockStencil(int x,int y,int front,Accessor client)1516 void *Surface::lockStencil(int x, int y, int front, Accessor client) 1517 { 1518 if(stencil.format == FORMAT_NULL) 1519 { 1520 return nullptr; 1521 } 1522 1523 resource->lock(client); 1524 1525 if(!stencil.buffer) 1526 { 1527 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.border, stencil.samples, stencil.format); 1528 } 1529 1530 return stencil.lockRect(x, y, front, LOCK_READWRITE); // FIXME 1531 } 1532 unlockStencil()1533 void Surface::unlockStencil() 1534 { 1535 stencil.unlockRect(); 1536 1537 resource->unlock(); 1538 } 1539 bytes(Format format)1540 int Surface::bytes(Format format) 1541 { 1542 switch(format) 1543 { 1544 case FORMAT_NULL: return 0; 1545 case FORMAT_P8: return 1; 1546 case FORMAT_A8P8: return 2; 1547 case FORMAT_A8: return 1; 1548 case FORMAT_R8I: return 1; 1549 case FORMAT_R8: return 1; 1550 case FORMAT_R3G3B2: return 1; 1551 case FORMAT_R16I: return 2; 1552 case FORMAT_R16UI: return 2; 1553 case FORMAT_A8R3G3B2: return 2; 1554 case FORMAT_R5G6B5: return 2; 1555 case FORMAT_A1R5G5B5: return 2; 1556 case FORMAT_X1R5G5B5: return 2; 1557 case FORMAT_R5G5B5A1: return 2; 1558 case FORMAT_X4R4G4B4: return 2; 1559 case FORMAT_A4R4G4B4: return 2; 1560 case FORMAT_R4G4B4A4: return 2; 1561 case FORMAT_R8G8B8: return 3; 1562 case FORMAT_B8G8R8: return 3; 1563 case FORMAT_R32I: return 4; 1564 case FORMAT_R32UI: return 4; 1565 case FORMAT_X8R8G8B8: return 4; 1566 // case FORMAT_X8G8R8B8Q: return 4; 1567 case FORMAT_A8R8G8B8: return 4; 1568 // case FORMAT_A8G8R8B8Q: return 4; 1569 case FORMAT_X8B8G8R8I: return 4; 1570 case FORMAT_X8B8G8R8: return 4; 1571 case FORMAT_SRGB8_X8: return 4; 1572 case FORMAT_SRGB8_A8: return 4; 1573 case FORMAT_A8B8G8R8I: return 4; 1574 case FORMAT_R8UI: return 1; 1575 case FORMAT_G8R8UI: return 2; 1576 case FORMAT_X8B8G8R8UI: return 4; 1577 case FORMAT_A8B8G8R8UI: return 4; 1578 case FORMAT_A8B8G8R8: return 4; 1579 case FORMAT_R8_SNORM: return 1; 1580 case FORMAT_G8R8_SNORM: return 2; 1581 case FORMAT_X8B8G8R8_SNORM: return 4; 1582 case FORMAT_A8B8G8R8_SNORM: return 4; 1583 case FORMAT_A2R10G10B10: return 4; 1584 case FORMAT_A2B10G10R10: return 4; 1585 case FORMAT_A2B10G10R10UI: return 4; 1586 case FORMAT_G8R8I: return 2; 1587 case FORMAT_G8R8: return 2; 1588 case FORMAT_G16R16I: return 4; 1589 case FORMAT_G16R16UI: return 4; 1590 case FORMAT_G16R16: return 4; 1591 case FORMAT_G32R32I: return 8; 1592 case FORMAT_G32R32UI: return 8; 1593 case FORMAT_X16B16G16R16I: return 8; 1594 case FORMAT_X16B16G16R16UI: return 8; 1595 case FORMAT_A16B16G16R16I: return 8; 1596 case FORMAT_A16B16G16R16UI: return 8; 1597 case FORMAT_A16B16G16R16: return 8; 1598 case FORMAT_X32B32G32R32I: return 16; 1599 case FORMAT_X32B32G32R32UI: return 16; 1600 case FORMAT_A32B32G32R32I: return 16; 1601 case FORMAT_A32B32G32R32UI: return 16; 1602 // Compressed formats 1603 case FORMAT_DXT1: return 2; // Column of four pixels 1604 case FORMAT_DXT3: return 4; // Column of four pixels 1605 case FORMAT_DXT5: return 4; // Column of four pixels 1606 case FORMAT_ATI1: return 2; // Column of four pixels 1607 case FORMAT_ATI2: return 4; // Column of four pixels 1608 case FORMAT_ETC1: return 2; // Column of four pixels 1609 case FORMAT_R11_EAC: return 2; 1610 case FORMAT_SIGNED_R11_EAC: return 2; 1611 case FORMAT_RG11_EAC: return 4; 1612 case FORMAT_SIGNED_RG11_EAC: return 4; 1613 case FORMAT_RGB8_ETC2: return 2; 1614 case FORMAT_SRGB8_ETC2: return 2; 1615 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1616 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1617 case FORMAT_RGBA8_ETC2_EAC: return 4; 1618 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4; 1619 case FORMAT_RGBA_ASTC_4x4_KHR: 1620 case FORMAT_RGBA_ASTC_5x4_KHR: 1621 case FORMAT_RGBA_ASTC_5x5_KHR: 1622 case FORMAT_RGBA_ASTC_6x5_KHR: 1623 case FORMAT_RGBA_ASTC_6x6_KHR: 1624 case FORMAT_RGBA_ASTC_8x5_KHR: 1625 case FORMAT_RGBA_ASTC_8x6_KHR: 1626 case FORMAT_RGBA_ASTC_8x8_KHR: 1627 case FORMAT_RGBA_ASTC_10x5_KHR: 1628 case FORMAT_RGBA_ASTC_10x6_KHR: 1629 case FORMAT_RGBA_ASTC_10x8_KHR: 1630 case FORMAT_RGBA_ASTC_10x10_KHR: 1631 case FORMAT_RGBA_ASTC_12x10_KHR: 1632 case FORMAT_RGBA_ASTC_12x12_KHR: 1633 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1634 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1635 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1636 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1637 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1638 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1639 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1640 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1641 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1642 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1643 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1644 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1645 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1646 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME 1647 // Bumpmap formats 1648 case FORMAT_V8U8: return 2; 1649 case FORMAT_L6V5U5: return 2; 1650 case FORMAT_Q8W8V8U8: return 4; 1651 case FORMAT_X8L8V8U8: return 4; 1652 case FORMAT_A2W10V10U10: return 4; 1653 case FORMAT_V16U16: return 4; 1654 case FORMAT_A16W16V16U16: return 8; 1655 case FORMAT_Q16W16V16U16: return 8; 1656 // Luminance formats 1657 case FORMAT_L8: return 1; 1658 case FORMAT_A4L4: return 1; 1659 case FORMAT_L16: return 2; 1660 case FORMAT_A8L8: return 2; 1661 case FORMAT_L16F: return 2; 1662 case FORMAT_A16L16F: return 4; 1663 case FORMAT_L32F: return 4; 1664 case FORMAT_A32L32F: return 8; 1665 // Floating-point formats 1666 case FORMAT_A16F: return 2; 1667 case FORMAT_R16F: return 2; 1668 case FORMAT_G16R16F: return 4; 1669 case FORMAT_B16G16R16F: return 6; 1670 case FORMAT_X16B16G16R16F: return 8; 1671 case FORMAT_A16B16G16R16F: return 8; 1672 case FORMAT_X16B16G16R16F_UNSIGNED: return 8; 1673 case FORMAT_A32F: return 4; 1674 case FORMAT_R32F: return 4; 1675 case FORMAT_G32R32F: return 8; 1676 case FORMAT_B32G32R32F: return 12; 1677 case FORMAT_X32B32G32R32F: return 16; 1678 case FORMAT_A32B32G32R32F: return 16; 1679 case FORMAT_X32B32G32R32F_UNSIGNED: return 16; 1680 // Depth/stencil formats 1681 case FORMAT_D16: return 2; 1682 case FORMAT_D32: return 4; 1683 case FORMAT_D24X8: return 4; 1684 case FORMAT_D24S8: return 4; 1685 case FORMAT_D24FS8: return 4; 1686 case FORMAT_D32F: return 4; 1687 case FORMAT_D32FS8: return 4; 1688 case FORMAT_D32F_COMPLEMENTARY: return 4; 1689 case FORMAT_D32FS8_COMPLEMENTARY: return 4; 1690 case FORMAT_D32F_LOCKABLE: return 4; 1691 case FORMAT_D32FS8_TEXTURE: return 4; 1692 case FORMAT_D32F_SHADOW: return 4; 1693 case FORMAT_D32FS8_SHADOW: return 4; 1694 case FORMAT_DF24S8: return 4; 1695 case FORMAT_DF16S8: return 2; 1696 case FORMAT_INTZ: return 4; 1697 case FORMAT_S8: return 1; 1698 case FORMAT_YV12_BT601: return 1; // Y plane only 1699 case FORMAT_YV12_BT709: return 1; // Y plane only 1700 case FORMAT_YV12_JFIF: return 1; // Y plane only 1701 default: 1702 ASSERT(false); 1703 } 1704 1705 return 0; 1706 } 1707 pitchB(int width,int border,Format format,bool target)1708 int Surface::pitchB(int width, int border, Format format, bool target) 1709 { 1710 width += 2 * border; 1711 1712 if(target || isDepth(format) || isStencil(format)) 1713 { 1714 width = align(width, 2); 1715 } 1716 1717 switch(format) 1718 { 1719 case FORMAT_DXT1: 1720 case FORMAT_ETC1: 1721 case FORMAT_R11_EAC: 1722 case FORMAT_SIGNED_R11_EAC: 1723 case FORMAT_RGB8_ETC2: 1724 case FORMAT_SRGB8_ETC2: 1725 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1726 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1727 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows 1728 case FORMAT_RG11_EAC: 1729 case FORMAT_SIGNED_RG11_EAC: 1730 case FORMAT_RGBA8_ETC2_EAC: 1731 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1732 case FORMAT_RGBA_ASTC_4x4_KHR: 1733 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1734 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1735 case FORMAT_RGBA_ASTC_5x4_KHR: 1736 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1737 case FORMAT_RGBA_ASTC_5x5_KHR: 1738 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1739 return 16 * ((width + 4) / 5); 1740 case FORMAT_RGBA_ASTC_6x5_KHR: 1741 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1742 case FORMAT_RGBA_ASTC_6x6_KHR: 1743 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1744 return 16 * ((width + 5) / 6); 1745 case FORMAT_RGBA_ASTC_8x5_KHR: 1746 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1747 case FORMAT_RGBA_ASTC_8x6_KHR: 1748 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1749 case FORMAT_RGBA_ASTC_8x8_KHR: 1750 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1751 return 16 * ((width + 7) / 8); 1752 case FORMAT_RGBA_ASTC_10x5_KHR: 1753 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1754 case FORMAT_RGBA_ASTC_10x6_KHR: 1755 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1756 case FORMAT_RGBA_ASTC_10x8_KHR: 1757 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1758 case FORMAT_RGBA_ASTC_10x10_KHR: 1759 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1760 return 16 * ((width + 9) / 10); 1761 case FORMAT_RGBA_ASTC_12x10_KHR: 1762 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1763 case FORMAT_RGBA_ASTC_12x12_KHR: 1764 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1765 return 16 * ((width + 11) / 12); 1766 case FORMAT_DXT3: 1767 case FORMAT_DXT5: 1768 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1769 case FORMAT_ATI1: 1770 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row 1771 case FORMAT_ATI2: 1772 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row 1773 case FORMAT_YV12_BT601: 1774 case FORMAT_YV12_BT709: 1775 case FORMAT_YV12_JFIF: 1776 return align(width, 16); 1777 default: 1778 return bytes(format) * width; 1779 } 1780 } 1781 pitchP(int width,int border,Format format,bool target)1782 int Surface::pitchP(int width, int border, Format format, bool target) 1783 { 1784 int B = bytes(format); 1785 1786 return B > 0 ? pitchB(width, border, format, target) / B : 0; 1787 } 1788 sliceB(int width,int height,int border,Format format,bool target)1789 int Surface::sliceB(int width, int height, int border, Format format, bool target) 1790 { 1791 height += 2 * border; 1792 1793 if(target || isDepth(format) || isStencil(format)) 1794 { 1795 height = ((height + 1) & ~1); 1796 } 1797 1798 switch(format) 1799 { 1800 case FORMAT_DXT1: 1801 case FORMAT_DXT3: 1802 case FORMAT_DXT5: 1803 case FORMAT_ETC1: 1804 case FORMAT_R11_EAC: 1805 case FORMAT_SIGNED_R11_EAC: 1806 case FORMAT_RG11_EAC: 1807 case FORMAT_SIGNED_RG11_EAC: 1808 case FORMAT_RGB8_ETC2: 1809 case FORMAT_SRGB8_ETC2: 1810 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1811 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1812 case FORMAT_RGBA8_ETC2_EAC: 1813 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1814 case FORMAT_RGBA_ASTC_4x4_KHR: 1815 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1816 case FORMAT_RGBA_ASTC_5x4_KHR: 1817 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1818 return pitchB(width, border, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows 1819 case FORMAT_RGBA_ASTC_5x5_KHR: 1820 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1821 case FORMAT_RGBA_ASTC_6x5_KHR: 1822 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1823 case FORMAT_RGBA_ASTC_8x5_KHR: 1824 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1825 case FORMAT_RGBA_ASTC_10x5_KHR: 1826 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1827 return pitchB(width, border, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows 1828 case FORMAT_RGBA_ASTC_6x6_KHR: 1829 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1830 case FORMAT_RGBA_ASTC_8x6_KHR: 1831 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1832 case FORMAT_RGBA_ASTC_10x6_KHR: 1833 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1834 return pitchB(width, border, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows 1835 case FORMAT_RGBA_ASTC_8x8_KHR: 1836 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1837 case FORMAT_RGBA_ASTC_10x8_KHR: 1838 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1839 return pitchB(width, border, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows 1840 case FORMAT_RGBA_ASTC_10x10_KHR: 1841 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1842 case FORMAT_RGBA_ASTC_12x10_KHR: 1843 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1844 return pitchB(width, border, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows 1845 case FORMAT_RGBA_ASTC_12x12_KHR: 1846 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1847 return pitchB(width, border, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows 1848 case FORMAT_ATI1: 1849 case FORMAT_ATI2: 1850 default: 1851 return pitchB(width, border, format, target) * height; // Pitch computed per row 1852 } 1853 } 1854 sliceP(int width,int height,int border,Format format,bool target)1855 int Surface::sliceP(int width, int height, int border, Format format, bool target) 1856 { 1857 int B = bytes(format); 1858 1859 return B > 0 ? sliceB(width, height, border, format, target) / B : 0; 1860 } 1861 update(Buffer & destination,Buffer & source)1862 void Surface::update(Buffer &destination, Buffer &source) 1863 { 1864 // ASSERT(source.lock != LOCK_UNLOCKED); 1865 // ASSERT(destination.lock != LOCK_UNLOCKED); 1866 1867 if(destination.buffer != source.buffer) 1868 { 1869 ASSERT(source.dirty && !destination.dirty); 1870 1871 switch(source.format) 1872 { 1873 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format 1874 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format 1875 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format 1876 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format 1877 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format 1878 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format 1879 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format 1880 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format 1881 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format 1882 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format 1883 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format 1884 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format 1885 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format 1886 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format 1887 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format 1888 case FORMAT_ETC1: 1889 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format 1890 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format 1891 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format 1892 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format 1893 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format 1894 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format 1895 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format 1896 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format 1897 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format 1898 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format 1899 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format 1900 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format 1901 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format 1902 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format 1903 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format 1904 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format 1905 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format 1906 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format 1907 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format 1908 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format 1909 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format 1910 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format 1911 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format 1912 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format 1913 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format 1914 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format 1915 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format 1916 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format 1917 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format 1918 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format 1919 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format 1920 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format 1921 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format 1922 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format 1923 default: genericUpdate(destination, source); break; 1924 } 1925 } 1926 } 1927 genericUpdate(Buffer & destination,Buffer & source)1928 void Surface::genericUpdate(Buffer &destination, Buffer &source) 1929 { 1930 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1931 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1932 1933 int depth = min(destination.depth, source.depth); 1934 int height = min(destination.height, source.height); 1935 int width = min(destination.width, source.width); 1936 int rowBytes = width * source.bytes; 1937 1938 for(int z = 0; z < depth; z++) 1939 { 1940 unsigned char *sourceRow = sourceSlice; 1941 unsigned char *destinationRow = destinationSlice; 1942 1943 for(int y = 0; y < height; y++) 1944 { 1945 if(source.format == destination.format) 1946 { 1947 memcpy(destinationRow, sourceRow, rowBytes); 1948 } 1949 else 1950 { 1951 unsigned char *sourceElement = sourceRow; 1952 unsigned char *destinationElement = destinationRow; 1953 1954 for(int x = 0; x < width; x++) 1955 { 1956 Color<float> color = source.read(sourceElement); 1957 destination.write(destinationElement, color); 1958 1959 sourceElement += source.bytes; 1960 destinationElement += destination.bytes; 1961 } 1962 } 1963 1964 sourceRow += source.pitchB; 1965 destinationRow += destination.pitchB; 1966 } 1967 1968 sourceSlice += source.sliceB; 1969 destinationSlice += destination.sliceB; 1970 } 1971 1972 source.unlockRect(); 1973 destination.unlockRect(); 1974 } 1975 decodeR8G8B8(Buffer & destination,Buffer & source)1976 void Surface::decodeR8G8B8(Buffer &destination, Buffer &source) 1977 { 1978 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 1979 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 1980 1981 int depth = min(destination.depth, source.depth); 1982 int height = min(destination.height, source.height); 1983 int width = min(destination.width, source.width); 1984 1985 for(int z = 0; z < depth; z++) 1986 { 1987 unsigned char *sourceRow = sourceSlice; 1988 unsigned char *destinationRow = destinationSlice; 1989 1990 for(int y = 0; y < height; y++) 1991 { 1992 unsigned char *sourceElement = sourceRow; 1993 unsigned char *destinationElement = destinationRow; 1994 1995 for(int x = 0; x < width; x++) 1996 { 1997 unsigned int b = sourceElement[0]; 1998 unsigned int g = sourceElement[1]; 1999 unsigned int r = sourceElement[2]; 2000 2001 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0); 2002 2003 sourceElement += source.bytes; 2004 destinationElement += destination.bytes; 2005 } 2006 2007 sourceRow += source.pitchB; 2008 destinationRow += destination.pitchB; 2009 } 2010 2011 sourceSlice += source.sliceB; 2012 destinationSlice += destination.sliceB; 2013 } 2014 2015 source.unlockRect(); 2016 destination.unlockRect(); 2017 } 2018 decodeX1R5G5B5(Buffer & destination,Buffer & source)2019 void Surface::decodeX1R5G5B5(Buffer &destination, Buffer &source) 2020 { 2021 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2022 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2023 2024 int depth = min(destination.depth, source.depth); 2025 int height = min(destination.height, source.height); 2026 int width = min(destination.width, source.width); 2027 2028 for(int z = 0; z < depth; z++) 2029 { 2030 unsigned char *sourceRow = sourceSlice; 2031 unsigned char *destinationRow = destinationSlice; 2032 2033 for(int y = 0; y < height; y++) 2034 { 2035 unsigned char *sourceElement = sourceRow; 2036 unsigned char *destinationElement = destinationRow; 2037 2038 for(int x = 0; x < width; x++) 2039 { 2040 unsigned int xrgb = *(unsigned short*)sourceElement; 2041 2042 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 2043 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 2044 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8); 2045 2046 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2047 2048 sourceElement += source.bytes; 2049 destinationElement += destination.bytes; 2050 } 2051 2052 sourceRow += source.pitchB; 2053 destinationRow += destination.pitchB; 2054 } 2055 2056 sourceSlice += source.sliceB; 2057 destinationSlice += destination.sliceB; 2058 } 2059 2060 source.unlockRect(); 2061 destination.unlockRect(); 2062 } 2063 decodeA1R5G5B5(Buffer & destination,Buffer & source)2064 void Surface::decodeA1R5G5B5(Buffer &destination, Buffer &source) 2065 { 2066 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2067 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2068 2069 int depth = min(destination.depth, source.depth); 2070 int height = min(destination.height, source.height); 2071 int width = min(destination.width, source.width); 2072 2073 for(int z = 0; z < depth; z++) 2074 { 2075 unsigned char *sourceRow = sourceSlice; 2076 unsigned char *destinationRow = destinationSlice; 2077 2078 for(int y = 0; y < height; y++) 2079 { 2080 unsigned char *sourceElement = sourceRow; 2081 unsigned char *destinationElement = destinationRow; 2082 2083 for(int x = 0; x < width; x++) 2084 { 2085 unsigned int argb = *(unsigned short*)sourceElement; 2086 2087 unsigned int a = (argb & 0x8000) * 130560; 2088 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 2089 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 2090 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8); 2091 2092 *(unsigned int*)destinationElement = a | r | g | b; 2093 2094 sourceElement += source.bytes; 2095 destinationElement += destination.bytes; 2096 } 2097 2098 sourceRow += source.pitchB; 2099 destinationRow += destination.pitchB; 2100 } 2101 2102 sourceSlice += source.sliceB; 2103 destinationSlice += destination.sliceB; 2104 } 2105 2106 source.unlockRect(); 2107 destination.unlockRect(); 2108 } 2109 decodeX4R4G4B4(Buffer & destination,Buffer & source)2110 void Surface::decodeX4R4G4B4(Buffer &destination, Buffer &source) 2111 { 2112 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2113 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2114 2115 int depth = min(destination.depth, source.depth); 2116 int height = min(destination.height, source.height); 2117 int width = min(destination.width, source.width); 2118 2119 for(int z = 0; z < depth; z++) 2120 { 2121 unsigned char *sourceRow = sourceSlice; 2122 unsigned char *destinationRow = destinationSlice; 2123 2124 for(int y = 0; y < height; y++) 2125 { 2126 unsigned char *sourceElement = sourceRow; 2127 unsigned char *destinationElement = destinationRow; 2128 2129 for(int x = 0; x < width; x++) 2130 { 2131 unsigned int xrgb = *(unsigned short*)sourceElement; 2132 2133 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000; 2134 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00; 2135 unsigned int b = (xrgb & 0x000F) * 0x00000011; 2136 2137 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2138 2139 sourceElement += source.bytes; 2140 destinationElement += destination.bytes; 2141 } 2142 2143 sourceRow += source.pitchB; 2144 destinationRow += destination.pitchB; 2145 } 2146 2147 sourceSlice += source.sliceB; 2148 destinationSlice += destination.sliceB; 2149 } 2150 2151 source.unlockRect(); 2152 destination.unlockRect(); 2153 } 2154 decodeA4R4G4B4(Buffer & destination,Buffer & source)2155 void Surface::decodeA4R4G4B4(Buffer &destination, Buffer &source) 2156 { 2157 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2158 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2159 2160 int depth = min(destination.depth, source.depth); 2161 int height = min(destination.height, source.height); 2162 int width = min(destination.width, source.width); 2163 2164 for(int z = 0; z < depth; z++) 2165 { 2166 unsigned char *sourceRow = sourceSlice; 2167 unsigned char *destinationRow = destinationSlice; 2168 2169 for(int y = 0; y < height; y++) 2170 { 2171 unsigned char *sourceElement = sourceRow; 2172 unsigned char *destinationElement = destinationRow; 2173 2174 for(int x = 0; x < width; x++) 2175 { 2176 unsigned int argb = *(unsigned short*)sourceElement; 2177 2178 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000; 2179 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000; 2180 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00; 2181 unsigned int b = (argb & 0x000F) * 0x00000011; 2182 2183 *(unsigned int*)destinationElement = a | r | g | b; 2184 2185 sourceElement += source.bytes; 2186 destinationElement += destination.bytes; 2187 } 2188 2189 sourceRow += source.pitchB; 2190 destinationRow += destination.pitchB; 2191 } 2192 2193 sourceSlice += source.sliceB; 2194 destinationSlice += destination.sliceB; 2195 } 2196 2197 source.unlockRect(); 2198 destination.unlockRect(); 2199 } 2200 decodeP8(Buffer & destination,Buffer & source)2201 void Surface::decodeP8(Buffer &destination, Buffer &source) 2202 { 2203 unsigned char *sourceSlice = (unsigned char*)source.lockRect(0, 0, 0, sw::LOCK_READONLY); 2204 unsigned char *destinationSlice = (unsigned char*)destination.lockRect(0, 0, 0, sw::LOCK_UPDATE); 2205 2206 int depth = min(destination.depth, source.depth); 2207 int height = min(destination.height, source.height); 2208 int width = min(destination.width, source.width); 2209 2210 for(int z = 0; z < depth; z++) 2211 { 2212 unsigned char *sourceRow = sourceSlice; 2213 unsigned char *destinationRow = destinationSlice; 2214 2215 for(int y = 0; y < height; y++) 2216 { 2217 unsigned char *sourceElement = sourceRow; 2218 unsigned char *destinationElement = destinationRow; 2219 2220 for(int x = 0; x < width; x++) 2221 { 2222 unsigned int abgr = palette[*(unsigned char*)sourceElement]; 2223 2224 unsigned int r = (abgr & 0x000000FF) << 16; 2225 unsigned int g = (abgr & 0x0000FF00) << 0; 2226 unsigned int b = (abgr & 0x00FF0000) >> 16; 2227 unsigned int a = (abgr & 0xFF000000) >> 0; 2228 2229 *(unsigned int*)destinationElement = a | r | g | b; 2230 2231 sourceElement += source.bytes; 2232 destinationElement += destination.bytes; 2233 } 2234 2235 sourceRow += source.pitchB; 2236 destinationRow += destination.pitchB; 2237 } 2238 2239 sourceSlice += source.sliceB; 2240 destinationSlice += destination.sliceB; 2241 } 2242 2243 source.unlockRect(); 2244 destination.unlockRect(); 2245 } 2246 decodeDXT1(Buffer & internal,Buffer & external)2247 void Surface::decodeDXT1(Buffer &internal, Buffer &external) 2248 { 2249 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2250 const DXT1 *source = (const DXT1*)external.lockRect(0, 0, 0, LOCK_READONLY); 2251 2252 for(int z = 0; z < external.depth; z++) 2253 { 2254 unsigned int *dest = destSlice; 2255 2256 for(int y = 0; y < external.height; y += 4) 2257 { 2258 for(int x = 0; x < external.width; x += 4) 2259 { 2260 Color<byte> c[4]; 2261 2262 c[0] = source->c0; 2263 c[1] = source->c1; 2264 2265 if(source->c0 > source->c1) // No transparency 2266 { 2267 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2268 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2269 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2270 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2271 c[2].a = 0xFF; 2272 2273 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2274 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2275 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2276 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2277 c[3].a = 0xFF; 2278 } 2279 else // c3 transparent 2280 { 2281 // c2 = 1 / 2 * c0 + 1 / 2 * c1 2282 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2); 2283 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2); 2284 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2); 2285 c[2].a = 0xFF; 2286 2287 c[3].r = 0; 2288 c[3].g = 0; 2289 c[3].b = 0; 2290 c[3].a = 0; 2291 } 2292 2293 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2294 { 2295 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2296 { 2297 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4]; 2298 } 2299 } 2300 2301 source++; 2302 } 2303 } 2304 2305 (byte*&)destSlice += internal.sliceB; 2306 } 2307 2308 external.unlockRect(); 2309 internal.unlockRect(); 2310 } 2311 decodeDXT3(Buffer & internal,Buffer & external)2312 void Surface::decodeDXT3(Buffer &internal, Buffer &external) 2313 { 2314 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2315 const DXT3 *source = (const DXT3*)external.lockRect(0, 0, 0, LOCK_READONLY); 2316 2317 for(int z = 0; z < external.depth; z++) 2318 { 2319 unsigned int *dest = destSlice; 2320 2321 for(int y = 0; y < external.height; y += 4) 2322 { 2323 for(int x = 0; x < external.width; x += 4) 2324 { 2325 Color<byte> c[4]; 2326 2327 c[0] = source->c0; 2328 c[1] = source->c1; 2329 2330 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2331 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2332 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2333 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2334 2335 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2336 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2337 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2338 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2339 2340 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2341 { 2342 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2343 { 2344 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F; 2345 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24)); 2346 2347 dest[(x + i) + (y + j) * internal.width] = color; 2348 } 2349 } 2350 2351 source++; 2352 } 2353 } 2354 2355 (byte*&)destSlice += internal.sliceB; 2356 } 2357 2358 external.unlockRect(); 2359 internal.unlockRect(); 2360 } 2361 decodeDXT5(Buffer & internal,Buffer & external)2362 void Surface::decodeDXT5(Buffer &internal, Buffer &external) 2363 { 2364 unsigned int *destSlice = (unsigned int*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2365 const DXT5 *source = (const DXT5*)external.lockRect(0, 0, 0, LOCK_READONLY); 2366 2367 for(int z = 0; z < external.depth; z++) 2368 { 2369 unsigned int *dest = destSlice; 2370 2371 for(int y = 0; y < external.height; y += 4) 2372 { 2373 for(int x = 0; x < external.width; x += 4) 2374 { 2375 Color<byte> c[4]; 2376 2377 c[0] = source->c0; 2378 c[1] = source->c1; 2379 2380 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2381 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2382 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2383 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2384 2385 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2386 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2387 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2388 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2389 2390 byte a[8]; 2391 2392 a[0] = source->a0; 2393 a[1] = source->a1; 2394 2395 if(a[0] > a[1]) 2396 { 2397 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7); 2398 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7); 2399 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7); 2400 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7); 2401 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7); 2402 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7); 2403 } 2404 else 2405 { 2406 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5); 2407 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5); 2408 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5); 2409 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5); 2410 a[6] = 0; 2411 a[7] = 0xFF; 2412 } 2413 2414 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2415 { 2416 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2417 { 2418 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24; 2419 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha; 2420 2421 dest[(x + i) + (y + j) * internal.width] = color; 2422 } 2423 } 2424 2425 source++; 2426 } 2427 } 2428 2429 (byte*&)destSlice += internal.sliceB; 2430 } 2431 2432 external.unlockRect(); 2433 internal.unlockRect(); 2434 } 2435 decodeATI1(Buffer & internal,Buffer & external)2436 void Surface::decodeATI1(Buffer &internal, Buffer &external) 2437 { 2438 byte *destSlice = (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2439 const ATI1 *source = (const ATI1*)external.lockRect(0, 0, 0, LOCK_READONLY); 2440 2441 for(int z = 0; z < external.depth; z++) 2442 { 2443 byte *dest = destSlice; 2444 2445 for(int y = 0; y < external.height; y += 4) 2446 { 2447 for(int x = 0; x < external.width; x += 4) 2448 { 2449 byte r[8]; 2450 2451 r[0] = source->r0; 2452 r[1] = source->r1; 2453 2454 if(r[0] > r[1]) 2455 { 2456 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7); 2457 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7); 2458 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7); 2459 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7); 2460 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7); 2461 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7); 2462 } 2463 else 2464 { 2465 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5); 2466 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5); 2467 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5); 2468 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5); 2469 r[6] = 0; 2470 r[7] = 0xFF; 2471 } 2472 2473 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2474 { 2475 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2476 { 2477 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8]; 2478 } 2479 } 2480 2481 source++; 2482 } 2483 } 2484 2485 destSlice += internal.sliceB; 2486 } 2487 2488 external.unlockRect(); 2489 internal.unlockRect(); 2490 } 2491 decodeATI2(Buffer & internal,Buffer & external)2492 void Surface::decodeATI2(Buffer &internal, Buffer &external) 2493 { 2494 word *destSlice = (word*)internal.lockRect(0, 0, 0, LOCK_UPDATE); 2495 const ATI2 *source = (const ATI2*)external.lockRect(0, 0, 0, LOCK_READONLY); 2496 2497 for(int z = 0; z < external.depth; z++) 2498 { 2499 word *dest = destSlice; 2500 2501 for(int y = 0; y < external.height; y += 4) 2502 { 2503 for(int x = 0; x < external.width; x += 4) 2504 { 2505 byte X[8]; 2506 2507 X[0] = source->x0; 2508 X[1] = source->x1; 2509 2510 if(X[0] > X[1]) 2511 { 2512 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7); 2513 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7); 2514 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7); 2515 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7); 2516 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7); 2517 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7); 2518 } 2519 else 2520 { 2521 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5); 2522 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5); 2523 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5); 2524 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5); 2525 X[6] = 0; 2526 X[7] = 0xFF; 2527 } 2528 2529 byte Y[8]; 2530 2531 Y[0] = source->y0; 2532 Y[1] = source->y1; 2533 2534 if(Y[0] > Y[1]) 2535 { 2536 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7); 2537 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7); 2538 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7); 2539 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7); 2540 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7); 2541 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7); 2542 } 2543 else 2544 { 2545 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5); 2546 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5); 2547 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5); 2548 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5); 2549 Y[6] = 0; 2550 Y[7] = 0xFF; 2551 } 2552 2553 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2554 { 2555 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2556 { 2557 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8]; 2558 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8]; 2559 2560 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r; 2561 } 2562 } 2563 2564 source++; 2565 } 2566 } 2567 2568 (byte*&)destSlice += internal.sliceB; 2569 } 2570 2571 external.unlockRect(); 2572 internal.unlockRect(); 2573 } 2574 decodeETC2(Buffer & internal,Buffer & external,int nbAlphaBits,bool isSRGB)2575 void Surface::decodeETC2(Buffer &internal, Buffer &external, int nbAlphaBits, bool isSRGB) 2576 { 2577 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), (byte*)internal.lockRect(0, 0, 0, LOCK_UPDATE), external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2578 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB)); 2579 external.unlockRect(); 2580 internal.unlockRect(); 2581 2582 if(isSRGB) 2583 { 2584 static byte sRGBtoLinearTable[256]; 2585 static bool sRGBtoLinearTableDirty = true; 2586 if(sRGBtoLinearTableDirty) 2587 { 2588 for(int i = 0; i < 256; i++) 2589 { 2590 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f); 2591 } 2592 sRGBtoLinearTableDirty = false; 2593 } 2594 2595 // Perform sRGB conversion in place after decoding 2596 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE); 2597 for(int y = 0; y < internal.height; y++) 2598 { 2599 byte *srcRow = src + y * internal.pitchB; 2600 for(int x = 0; x < internal.width; x++) 2601 { 2602 byte *srcPix = srcRow + x * internal.bytes; 2603 for(int i = 0; i < 3; i++) 2604 { 2605 srcPix[i] = sRGBtoLinearTable[srcPix[i]]; 2606 } 2607 } 2608 } 2609 internal.unlockRect(); 2610 } 2611 } 2612 decodeEAC(Buffer & internal,Buffer & external,int nbChannels,bool isSigned)2613 void Surface::decodeEAC(Buffer &internal, Buffer &external, int nbChannels, bool isSigned) 2614 { 2615 ASSERT(nbChannels == 1 || nbChannels == 2); 2616 2617 byte *src = (byte*)internal.lockRect(0, 0, 0, LOCK_READWRITE); 2618 ETC_Decoder::Decode((const byte*)external.lockRect(0, 0, 0, LOCK_READONLY), src, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2619 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED)); 2620 external.unlockRect(); 2621 2622 // FIXME: We convert EAC data to float, until signed short internal formats are supported 2623 // This code can be removed if ETC2 images are decoded to internal 16 bit signed R/RG formats 2624 const float normalization = isSigned ? (1.0f / (8.0f * 127.875f)) : (1.0f / (8.0f * 255.875f)); 2625 for(int y = 0; y < internal.height; y++) 2626 { 2627 byte* srcRow = src + y * internal.pitchB; 2628 for(int x = internal.width - 1; x >= 0; x--) 2629 { 2630 int* srcPix = reinterpret_cast<int*>(srcRow + x * internal.bytes); 2631 float* dstPix = reinterpret_cast<float*>(srcPix); 2632 for(int c = nbChannels - 1; c >= 0; c--) 2633 { 2634 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f); 2635 } 2636 } 2637 } 2638 2639 internal.unlockRect(); 2640 } 2641 decodeASTC(Buffer & internal,Buffer & external,int xBlockSize,int yBlockSize,int zBlockSize,bool isSRGB)2642 void Surface::decodeASTC(Buffer &internal, Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB) 2643 { 2644 } 2645 size(int width,int height,int depth,int border,int samples,Format format)2646 unsigned int Surface::size(int width, int height, int depth, int border, int samples, Format format) 2647 { 2648 width += 2 * border; 2649 height += 2 * border; 2650 2651 // Dimensions rounded up to multiples of 4, used for compressed formats 2652 int width4 = align(width, 4); 2653 int height4 = align(height, 4); 2654 2655 switch(format) 2656 { 2657 case FORMAT_DXT1: 2658 case FORMAT_ATI1: 2659 case FORMAT_ETC1: 2660 case FORMAT_R11_EAC: 2661 case FORMAT_SIGNED_R11_EAC: 2662 case FORMAT_RGB8_ETC2: 2663 case FORMAT_SRGB8_ETC2: 2664 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2665 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2666 return width4 * height4 * depth / 2; 2667 case FORMAT_DXT3: 2668 case FORMAT_DXT5: 2669 case FORMAT_ATI2: 2670 case FORMAT_RG11_EAC: 2671 case FORMAT_SIGNED_RG11_EAC: 2672 case FORMAT_RGBA8_ETC2_EAC: 2673 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2674 case FORMAT_RGBA_ASTC_4x4_KHR: 2675 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2676 return width4 * height4 * depth; 2677 case FORMAT_RGBA_ASTC_5x4_KHR: 2678 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2679 return align(width, 5) * height4 * depth; 2680 case FORMAT_RGBA_ASTC_5x5_KHR: 2681 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2682 return align(width, 5) * align(height, 5) * depth; 2683 case FORMAT_RGBA_ASTC_6x5_KHR: 2684 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2685 return align(width, 6) * align(height, 5) * depth; 2686 case FORMAT_RGBA_ASTC_6x6_KHR: 2687 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2688 return align(width, 6) * align(height, 6) * depth; 2689 case FORMAT_RGBA_ASTC_8x5_KHR: 2690 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2691 return align(width, 8) * align(height, 5) * depth; 2692 case FORMAT_RGBA_ASTC_8x6_KHR: 2693 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2694 return align(width, 8) * align(height, 6) * depth; 2695 case FORMAT_RGBA_ASTC_8x8_KHR: 2696 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2697 return align(width, 8) * align(height, 8) * depth; 2698 case FORMAT_RGBA_ASTC_10x5_KHR: 2699 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2700 return align(width, 10) * align(height, 5) * depth; 2701 case FORMAT_RGBA_ASTC_10x6_KHR: 2702 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2703 return align(width, 10) * align(height, 6) * depth; 2704 case FORMAT_RGBA_ASTC_10x8_KHR: 2705 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2706 return align(width, 10) * align(height, 8) * depth; 2707 case FORMAT_RGBA_ASTC_10x10_KHR: 2708 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2709 return align(width, 10) * align(height, 10) * depth; 2710 case FORMAT_RGBA_ASTC_12x10_KHR: 2711 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2712 return align(width, 12) * align(height, 10) * depth; 2713 case FORMAT_RGBA_ASTC_12x12_KHR: 2714 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2715 return align(width, 12) * align(height, 12) * depth; 2716 case FORMAT_YV12_BT601: 2717 case FORMAT_YV12_BT709: 2718 case FORMAT_YV12_JFIF: 2719 { 2720 unsigned int YStride = align(width, 16); 2721 unsigned int YSize = YStride * height; 2722 unsigned int CStride = align(YStride / 2, 16); 2723 unsigned int CSize = CStride * height / 2; 2724 2725 return YSize + 2 * CSize; 2726 } 2727 default: 2728 return bytes(format) * width * height * depth * samples; 2729 } 2730 } 2731 isStencil(Format format)2732 bool Surface::isStencil(Format format) 2733 { 2734 switch(format) 2735 { 2736 case FORMAT_D32: 2737 case FORMAT_D16: 2738 case FORMAT_D24X8: 2739 case FORMAT_D32F: 2740 case FORMAT_D32F_COMPLEMENTARY: 2741 case FORMAT_D32F_LOCKABLE: 2742 case FORMAT_D32F_SHADOW: 2743 return false; 2744 case FORMAT_D24S8: 2745 case FORMAT_D24FS8: 2746 case FORMAT_S8: 2747 case FORMAT_DF24S8: 2748 case FORMAT_DF16S8: 2749 case FORMAT_D32FS8_TEXTURE: 2750 case FORMAT_D32FS8_SHADOW: 2751 case FORMAT_D32FS8: 2752 case FORMAT_D32FS8_COMPLEMENTARY: 2753 case FORMAT_INTZ: 2754 return true; 2755 default: 2756 return false; 2757 } 2758 } 2759 isDepth(Format format)2760 bool Surface::isDepth(Format format) 2761 { 2762 switch(format) 2763 { 2764 case FORMAT_D32: 2765 case FORMAT_D16: 2766 case FORMAT_D24X8: 2767 case FORMAT_D24S8: 2768 case FORMAT_D24FS8: 2769 case FORMAT_D32F: 2770 case FORMAT_D32FS8: 2771 case FORMAT_D32F_COMPLEMENTARY: 2772 case FORMAT_D32FS8_COMPLEMENTARY: 2773 case FORMAT_D32F_LOCKABLE: 2774 case FORMAT_DF24S8: 2775 case FORMAT_DF16S8: 2776 case FORMAT_D32FS8_TEXTURE: 2777 case FORMAT_D32F_SHADOW: 2778 case FORMAT_D32FS8_SHADOW: 2779 case FORMAT_INTZ: 2780 return true; 2781 case FORMAT_S8: 2782 return false; 2783 default: 2784 return false; 2785 } 2786 } 2787 hasQuadLayout(Format format)2788 bool Surface::hasQuadLayout(Format format) 2789 { 2790 switch(format) 2791 { 2792 case FORMAT_D32: 2793 case FORMAT_D16: 2794 case FORMAT_D24X8: 2795 case FORMAT_D24S8: 2796 case FORMAT_D24FS8: 2797 case FORMAT_D32F: 2798 case FORMAT_D32FS8: 2799 case FORMAT_D32F_COMPLEMENTARY: 2800 case FORMAT_D32FS8_COMPLEMENTARY: 2801 case FORMAT_DF24S8: 2802 case FORMAT_DF16S8: 2803 case FORMAT_INTZ: 2804 case FORMAT_S8: 2805 case FORMAT_A8G8R8B8Q: 2806 case FORMAT_X8G8R8B8Q: 2807 return true; 2808 case FORMAT_D32F_LOCKABLE: 2809 case FORMAT_D32FS8_TEXTURE: 2810 case FORMAT_D32F_SHADOW: 2811 case FORMAT_D32FS8_SHADOW: 2812 default: 2813 break; 2814 } 2815 2816 return false; 2817 } 2818 isPalette(Format format)2819 bool Surface::isPalette(Format format) 2820 { 2821 switch(format) 2822 { 2823 case FORMAT_P8: 2824 case FORMAT_A8P8: 2825 return true; 2826 default: 2827 return false; 2828 } 2829 } 2830 isFloatFormat(Format format)2831 bool Surface::isFloatFormat(Format format) 2832 { 2833 switch(format) 2834 { 2835 case FORMAT_R5G6B5: 2836 case FORMAT_R8G8B8: 2837 case FORMAT_B8G8R8: 2838 case FORMAT_X8R8G8B8: 2839 case FORMAT_X8B8G8R8I: 2840 case FORMAT_X8B8G8R8: 2841 case FORMAT_A8R8G8B8: 2842 case FORMAT_SRGB8_X8: 2843 case FORMAT_SRGB8_A8: 2844 case FORMAT_A8B8G8R8I: 2845 case FORMAT_R8UI: 2846 case FORMAT_G8R8UI: 2847 case FORMAT_X8B8G8R8UI: 2848 case FORMAT_A8B8G8R8UI: 2849 case FORMAT_A8B8G8R8: 2850 case FORMAT_G8R8I: 2851 case FORMAT_G8R8: 2852 case FORMAT_A2B10G10R10: 2853 case FORMAT_A2B10G10R10UI: 2854 case FORMAT_R8_SNORM: 2855 case FORMAT_G8R8_SNORM: 2856 case FORMAT_X8B8G8R8_SNORM: 2857 case FORMAT_A8B8G8R8_SNORM: 2858 case FORMAT_R16I: 2859 case FORMAT_R16UI: 2860 case FORMAT_G16R16I: 2861 case FORMAT_G16R16UI: 2862 case FORMAT_G16R16: 2863 case FORMAT_X16B16G16R16I: 2864 case FORMAT_X16B16G16R16UI: 2865 case FORMAT_A16B16G16R16I: 2866 case FORMAT_A16B16G16R16UI: 2867 case FORMAT_A16B16G16R16: 2868 case FORMAT_V8U8: 2869 case FORMAT_Q8W8V8U8: 2870 case FORMAT_X8L8V8U8: 2871 case FORMAT_V16U16: 2872 case FORMAT_A16W16V16U16: 2873 case FORMAT_Q16W16V16U16: 2874 case FORMAT_A8: 2875 case FORMAT_R8I: 2876 case FORMAT_R8: 2877 case FORMAT_S8: 2878 case FORMAT_L8: 2879 case FORMAT_L16: 2880 case FORMAT_A8L8: 2881 case FORMAT_YV12_BT601: 2882 case FORMAT_YV12_BT709: 2883 case FORMAT_YV12_JFIF: 2884 case FORMAT_R32I: 2885 case FORMAT_R32UI: 2886 case FORMAT_G32R32I: 2887 case FORMAT_G32R32UI: 2888 case FORMAT_X32B32G32R32I: 2889 case FORMAT_X32B32G32R32UI: 2890 case FORMAT_A32B32G32R32I: 2891 case FORMAT_A32B32G32R32UI: 2892 return false; 2893 case FORMAT_R16F: 2894 case FORMAT_G16R16F: 2895 case FORMAT_B16G16R16F: 2896 case FORMAT_X16B16G16R16F: 2897 case FORMAT_A16B16G16R16F: 2898 case FORMAT_X16B16G16R16F_UNSIGNED: 2899 case FORMAT_R32F: 2900 case FORMAT_G32R32F: 2901 case FORMAT_B32G32R32F: 2902 case FORMAT_X32B32G32R32F: 2903 case FORMAT_A32B32G32R32F: 2904 case FORMAT_X32B32G32R32F_UNSIGNED: 2905 case FORMAT_D32F: 2906 case FORMAT_D32FS8: 2907 case FORMAT_D32F_COMPLEMENTARY: 2908 case FORMAT_D32FS8_COMPLEMENTARY: 2909 case FORMAT_D32F_LOCKABLE: 2910 case FORMAT_D32FS8_TEXTURE: 2911 case FORMAT_D32F_SHADOW: 2912 case FORMAT_D32FS8_SHADOW: 2913 case FORMAT_L16F: 2914 case FORMAT_A16L16F: 2915 case FORMAT_L32F: 2916 case FORMAT_A32L32F: 2917 return true; 2918 default: 2919 ASSERT(false); 2920 } 2921 2922 return false; 2923 } 2924 isUnsignedComponent(Format format,int component)2925 bool Surface::isUnsignedComponent(Format format, int component) 2926 { 2927 switch(format) 2928 { 2929 case FORMAT_NULL: 2930 case FORMAT_R5G6B5: 2931 case FORMAT_R8G8B8: 2932 case FORMAT_B8G8R8: 2933 case FORMAT_X8R8G8B8: 2934 case FORMAT_X8B8G8R8: 2935 case FORMAT_A8R8G8B8: 2936 case FORMAT_A8B8G8R8: 2937 case FORMAT_SRGB8_X8: 2938 case FORMAT_SRGB8_A8: 2939 case FORMAT_G8R8: 2940 case FORMAT_A2B10G10R10: 2941 case FORMAT_A2B10G10R10UI: 2942 case FORMAT_R16UI: 2943 case FORMAT_G16R16: 2944 case FORMAT_G16R16UI: 2945 case FORMAT_X16B16G16R16UI: 2946 case FORMAT_A16B16G16R16: 2947 case FORMAT_A16B16G16R16UI: 2948 case FORMAT_R32UI: 2949 case FORMAT_G32R32UI: 2950 case FORMAT_X32B32G32R32UI: 2951 case FORMAT_A32B32G32R32UI: 2952 case FORMAT_X32B32G32R32F_UNSIGNED: 2953 case FORMAT_R8UI: 2954 case FORMAT_G8R8UI: 2955 case FORMAT_X8B8G8R8UI: 2956 case FORMAT_A8B8G8R8UI: 2957 case FORMAT_D32F: 2958 case FORMAT_D32FS8: 2959 case FORMAT_D32F_COMPLEMENTARY: 2960 case FORMAT_D32FS8_COMPLEMENTARY: 2961 case FORMAT_D32F_LOCKABLE: 2962 case FORMAT_D32FS8_TEXTURE: 2963 case FORMAT_D32F_SHADOW: 2964 case FORMAT_D32FS8_SHADOW: 2965 case FORMAT_A8: 2966 case FORMAT_R8: 2967 case FORMAT_L8: 2968 case FORMAT_L16: 2969 case FORMAT_A8L8: 2970 case FORMAT_YV12_BT601: 2971 case FORMAT_YV12_BT709: 2972 case FORMAT_YV12_JFIF: 2973 return true; 2974 case FORMAT_A8B8G8R8I: 2975 case FORMAT_A16B16G16R16I: 2976 case FORMAT_A32B32G32R32I: 2977 case FORMAT_A8B8G8R8_SNORM: 2978 case FORMAT_Q8W8V8U8: 2979 case FORMAT_Q16W16V16U16: 2980 case FORMAT_A32B32G32R32F: 2981 return false; 2982 case FORMAT_R32F: 2983 case FORMAT_R8I: 2984 case FORMAT_R16I: 2985 case FORMAT_R32I: 2986 case FORMAT_R8_SNORM: 2987 return component >= 1; 2988 case FORMAT_V8U8: 2989 case FORMAT_X8L8V8U8: 2990 case FORMAT_V16U16: 2991 case FORMAT_G32R32F: 2992 case FORMAT_G8R8I: 2993 case FORMAT_G16R16I: 2994 case FORMAT_G32R32I: 2995 case FORMAT_G8R8_SNORM: 2996 return component >= 2; 2997 case FORMAT_A16W16V16U16: 2998 case FORMAT_B32G32R32F: 2999 case FORMAT_X32B32G32R32F: 3000 case FORMAT_X8B8G8R8I: 3001 case FORMAT_X16B16G16R16I: 3002 case FORMAT_X32B32G32R32I: 3003 case FORMAT_X8B8G8R8_SNORM: 3004 return component >= 3; 3005 default: 3006 ASSERT(false); 3007 } 3008 3009 return false; 3010 } 3011 isSRGBreadable(Format format)3012 bool Surface::isSRGBreadable(Format format) 3013 { 3014 // Keep in sync with Capabilities::isSRGBreadable 3015 switch(format) 3016 { 3017 case FORMAT_L8: 3018 case FORMAT_A8L8: 3019 case FORMAT_R8G8B8: 3020 case FORMAT_A8R8G8B8: 3021 case FORMAT_X8R8G8B8: 3022 case FORMAT_A8B8G8R8: 3023 case FORMAT_X8B8G8R8: 3024 case FORMAT_SRGB8_X8: 3025 case FORMAT_SRGB8_A8: 3026 case FORMAT_R5G6B5: 3027 case FORMAT_X1R5G5B5: 3028 case FORMAT_A1R5G5B5: 3029 case FORMAT_A4R4G4B4: 3030 case FORMAT_DXT1: 3031 case FORMAT_DXT3: 3032 case FORMAT_DXT5: 3033 case FORMAT_ATI1: 3034 case FORMAT_ATI2: 3035 return true; 3036 default: 3037 return false; 3038 } 3039 } 3040 isSRGBwritable(Format format)3041 bool Surface::isSRGBwritable(Format format) 3042 { 3043 // Keep in sync with Capabilities::isSRGBwritable 3044 switch(format) 3045 { 3046 case FORMAT_NULL: 3047 case FORMAT_A8R8G8B8: 3048 case FORMAT_X8R8G8B8: 3049 case FORMAT_A8B8G8R8: 3050 case FORMAT_X8B8G8R8: 3051 case FORMAT_SRGB8_X8: 3052 case FORMAT_SRGB8_A8: 3053 case FORMAT_R5G6B5: 3054 return true; 3055 default: 3056 return false; 3057 } 3058 } 3059 isSRGBformat(Format format)3060 bool Surface::isSRGBformat(Format format) 3061 { 3062 switch(format) 3063 { 3064 case FORMAT_SRGB8_X8: 3065 case FORMAT_SRGB8_A8: 3066 return true; 3067 default: 3068 return false; 3069 } 3070 } 3071 isCompressed(Format format)3072 bool Surface::isCompressed(Format format) 3073 { 3074 switch(format) 3075 { 3076 case FORMAT_DXT1: 3077 case FORMAT_DXT3: 3078 case FORMAT_DXT5: 3079 case FORMAT_ATI1: 3080 case FORMAT_ATI2: 3081 case FORMAT_ETC1: 3082 case FORMAT_R11_EAC: 3083 case FORMAT_SIGNED_R11_EAC: 3084 case FORMAT_RG11_EAC: 3085 case FORMAT_SIGNED_RG11_EAC: 3086 case FORMAT_RGB8_ETC2: 3087 case FORMAT_SRGB8_ETC2: 3088 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3089 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3090 case FORMAT_RGBA8_ETC2_EAC: 3091 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3092 case FORMAT_RGBA_ASTC_4x4_KHR: 3093 case FORMAT_RGBA_ASTC_5x4_KHR: 3094 case FORMAT_RGBA_ASTC_5x5_KHR: 3095 case FORMAT_RGBA_ASTC_6x5_KHR: 3096 case FORMAT_RGBA_ASTC_6x6_KHR: 3097 case FORMAT_RGBA_ASTC_8x5_KHR: 3098 case FORMAT_RGBA_ASTC_8x6_KHR: 3099 case FORMAT_RGBA_ASTC_8x8_KHR: 3100 case FORMAT_RGBA_ASTC_10x5_KHR: 3101 case FORMAT_RGBA_ASTC_10x6_KHR: 3102 case FORMAT_RGBA_ASTC_10x8_KHR: 3103 case FORMAT_RGBA_ASTC_10x10_KHR: 3104 case FORMAT_RGBA_ASTC_12x10_KHR: 3105 case FORMAT_RGBA_ASTC_12x12_KHR: 3106 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3107 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3108 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3109 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3110 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3111 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3112 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3113 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3114 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3115 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3116 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3117 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3118 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3119 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3120 return true; 3121 default: 3122 return false; 3123 } 3124 } 3125 isSignedNonNormalizedInteger(Format format)3126 bool Surface::isSignedNonNormalizedInteger(Format format) 3127 { 3128 switch(format) 3129 { 3130 case FORMAT_A8B8G8R8I: 3131 case FORMAT_X8B8G8R8I: 3132 case FORMAT_G8R8I: 3133 case FORMAT_R8I: 3134 case FORMAT_A16B16G16R16I: 3135 case FORMAT_X16B16G16R16I: 3136 case FORMAT_G16R16I: 3137 case FORMAT_R16I: 3138 case FORMAT_A32B32G32R32I: 3139 case FORMAT_X32B32G32R32I: 3140 case FORMAT_G32R32I: 3141 case FORMAT_R32I: 3142 return true; 3143 default: 3144 return false; 3145 } 3146 } 3147 isUnsignedNonNormalizedInteger(Format format)3148 bool Surface::isUnsignedNonNormalizedInteger(Format format) 3149 { 3150 switch(format) 3151 { 3152 case FORMAT_A8B8G8R8UI: 3153 case FORMAT_X8B8G8R8UI: 3154 case FORMAT_G8R8UI: 3155 case FORMAT_R8UI: 3156 case FORMAT_A16B16G16R16UI: 3157 case FORMAT_X16B16G16R16UI: 3158 case FORMAT_G16R16UI: 3159 case FORMAT_R16UI: 3160 case FORMAT_A32B32G32R32UI: 3161 case FORMAT_X32B32G32R32UI: 3162 case FORMAT_G32R32UI: 3163 case FORMAT_R32UI: 3164 return true; 3165 default: 3166 return false; 3167 } 3168 } 3169 isNonNormalizedInteger(Format format)3170 bool Surface::isNonNormalizedInteger(Format format) 3171 { 3172 return isSignedNonNormalizedInteger(format) || 3173 isUnsignedNonNormalizedInteger(format); 3174 } 3175 isNormalizedInteger(Format format)3176 bool Surface::isNormalizedInteger(Format format) 3177 { 3178 return !isFloatFormat(format) && 3179 !isNonNormalizedInteger(format) && 3180 !isCompressed(format) && 3181 !isDepth(format) && 3182 !isStencil(format); 3183 } 3184 componentCount(Format format)3185 int Surface::componentCount(Format format) 3186 { 3187 switch(format) 3188 { 3189 case FORMAT_R5G6B5: return 3; 3190 case FORMAT_X8R8G8B8: return 3; 3191 case FORMAT_X8B8G8R8I: return 3; 3192 case FORMAT_X8B8G8R8: return 3; 3193 case FORMAT_A8R8G8B8: return 4; 3194 case FORMAT_SRGB8_X8: return 3; 3195 case FORMAT_SRGB8_A8: return 4; 3196 case FORMAT_A8B8G8R8I: return 4; 3197 case FORMAT_A8B8G8R8: return 4; 3198 case FORMAT_G8R8I: return 2; 3199 case FORMAT_G8R8: return 2; 3200 case FORMAT_R8_SNORM: return 1; 3201 case FORMAT_G8R8_SNORM: return 2; 3202 case FORMAT_X8B8G8R8_SNORM:return 3; 3203 case FORMAT_A8B8G8R8_SNORM:return 4; 3204 case FORMAT_R8UI: return 1; 3205 case FORMAT_G8R8UI: return 2; 3206 case FORMAT_X8B8G8R8UI: return 3; 3207 case FORMAT_A8B8G8R8UI: return 4; 3208 case FORMAT_A2B10G10R10: return 4; 3209 case FORMAT_A2B10G10R10UI: return 4; 3210 case FORMAT_G16R16I: return 2; 3211 case FORMAT_G16R16UI: return 2; 3212 case FORMAT_G16R16: return 2; 3213 case FORMAT_G32R32I: return 2; 3214 case FORMAT_G32R32UI: return 2; 3215 case FORMAT_X16B16G16R16I: return 3; 3216 case FORMAT_X16B16G16R16UI: return 3; 3217 case FORMAT_A16B16G16R16I: return 4; 3218 case FORMAT_A16B16G16R16UI: return 4; 3219 case FORMAT_A16B16G16R16: return 4; 3220 case FORMAT_X32B32G32R32I: return 3; 3221 case FORMAT_X32B32G32R32UI: return 3; 3222 case FORMAT_A32B32G32R32I: return 4; 3223 case FORMAT_A32B32G32R32UI: return 4; 3224 case FORMAT_V8U8: return 2; 3225 case FORMAT_Q8W8V8U8: return 4; 3226 case FORMAT_X8L8V8U8: return 3; 3227 case FORMAT_V16U16: return 2; 3228 case FORMAT_A16W16V16U16: return 4; 3229 case FORMAT_Q16W16V16U16: return 4; 3230 case FORMAT_R32F: return 1; 3231 case FORMAT_G32R32F: return 2; 3232 case FORMAT_X32B32G32R32F: return 3; 3233 case FORMAT_A32B32G32R32F: return 4; 3234 case FORMAT_X32B32G32R32F_UNSIGNED: return 3; 3235 case FORMAT_D32F: return 1; 3236 case FORMAT_D32FS8: return 1; 3237 case FORMAT_D32F_LOCKABLE: return 1; 3238 case FORMAT_D32FS8_TEXTURE: return 1; 3239 case FORMAT_D32F_SHADOW: return 1; 3240 case FORMAT_D32FS8_SHADOW: return 1; 3241 case FORMAT_A8: return 1; 3242 case FORMAT_R8I: return 1; 3243 case FORMAT_R8: return 1; 3244 case FORMAT_R16I: return 1; 3245 case FORMAT_R16UI: return 1; 3246 case FORMAT_R32I: return 1; 3247 case FORMAT_R32UI: return 1; 3248 case FORMAT_L8: return 1; 3249 case FORMAT_L16: return 1; 3250 case FORMAT_A8L8: return 2; 3251 case FORMAT_YV12_BT601: return 3; 3252 case FORMAT_YV12_BT709: return 3; 3253 case FORMAT_YV12_JFIF: return 3; 3254 default: 3255 ASSERT(false); 3256 } 3257 3258 return 1; 3259 } 3260 allocateBuffer(int width,int height,int depth,int border,int samples,Format format)3261 void *Surface::allocateBuffer(int width, int height, int depth, int border, int samples, Format format) 3262 { 3263 // Render targets require 2x2 quads 3264 int width2 = (width + 1) & ~1; 3265 int height2 = (height + 1) & ~1; 3266 3267 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes, 3268 // and stencil operations also read 8 bytes per four 8-bit stencil values, 3269 // so we have to allocate 4 extra bytes to avoid buffer overruns. 3270 return allocate(size(width2, height2, depth, border, samples, format) + 4); 3271 } 3272 memfill4(void * buffer,int pattern,int bytes)3273 void Surface::memfill4(void *buffer, int pattern, int bytes) 3274 { 3275 while((size_t)buffer & 0x1 && bytes >= 1) 3276 { 3277 *(char*)buffer = (char)pattern; 3278 (char*&)buffer += 1; 3279 bytes -= 1; 3280 } 3281 3282 while((size_t)buffer & 0x3 && bytes >= 2) 3283 { 3284 *(short*)buffer = (short)pattern; 3285 (short*&)buffer += 1; 3286 bytes -= 2; 3287 } 3288 3289 #if defined(__i386__) || defined(__x86_64__) 3290 if(CPUID::supportsSSE()) 3291 { 3292 while((size_t)buffer & 0xF && bytes >= 4) 3293 { 3294 *(int*)buffer = pattern; 3295 (int*&)buffer += 1; 3296 bytes -= 4; 3297 } 3298 3299 __m128 quad = _mm_set_ps1((float&)pattern); 3300 3301 float *pointer = (float*)buffer; 3302 int qxwords = bytes / 64; 3303 bytes -= qxwords * 64; 3304 3305 while(qxwords--) 3306 { 3307 _mm_stream_ps(pointer + 0, quad); 3308 _mm_stream_ps(pointer + 4, quad); 3309 _mm_stream_ps(pointer + 8, quad); 3310 _mm_stream_ps(pointer + 12, quad); 3311 3312 pointer += 16; 3313 } 3314 3315 buffer = pointer; 3316 } 3317 #endif 3318 3319 while(bytes >= 4) 3320 { 3321 *(int*)buffer = (int)pattern; 3322 (int*&)buffer += 1; 3323 bytes -= 4; 3324 } 3325 3326 while(bytes >= 2) 3327 { 3328 *(short*)buffer = (short)pattern; 3329 (short*&)buffer += 1; 3330 bytes -= 2; 3331 } 3332 3333 while(bytes >= 1) 3334 { 3335 *(char*)buffer = (char)pattern; 3336 (char*&)buffer += 1; 3337 bytes -= 1; 3338 } 3339 } 3340 sync()3341 void Surface::sync() 3342 { 3343 resource->lock(EXCLUSIVE); 3344 resource->unlock(); 3345 } 3346 isEntire(const Rect & rect) const3347 bool Surface::isEntire(const Rect& rect) const 3348 { 3349 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1); 3350 } 3351 getRect() const3352 Rect Surface::getRect() const 3353 { 3354 return Rect(0, 0, internal.width, internal.height); 3355 } 3356 clearDepth(float depth,int x0,int y0,int width,int height)3357 void Surface::clearDepth(float depth, int x0, int y0, int width, int height) 3358 { 3359 if(width == 0 || height == 0) return; 3360 3361 // Not overlapping 3362 if(x0 > internal.width) return; 3363 if(y0 > internal.height) return; 3364 if(x0 + width < 0) return; 3365 if(y0 + height < 0) return; 3366 3367 // Clip against dimensions 3368 if(x0 < 0) {width += x0; x0 = 0;} 3369 if(x0 + width > internal.width) width = internal.width - x0; 3370 if(y0 < 0) {height += y0; y0 = 0;} 3371 if(y0 + height > internal.height) height = internal.height - y0; 3372 3373 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 3374 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 3375 3376 int x1 = x0 + width; 3377 int y1 = y0 + height; 3378 3379 if(!hasQuadLayout(internal.format)) 3380 { 3381 float *target = (float*)lockInternal(x0, y0, 0, lock, PUBLIC); 3382 3383 for(int z = 0; z < internal.samples; z++) 3384 { 3385 float *row = target; 3386 for(int y = y0; y < y1; y++) 3387 { 3388 memfill4(row, (int&)depth, width * sizeof(float)); 3389 row += internal.pitchP; 3390 } 3391 target += internal.sliceP; 3392 } 3393 3394 unlockInternal(); 3395 } 3396 else // Quad layout 3397 { 3398 if(complementaryDepthBuffer) 3399 { 3400 depth = 1 - depth; 3401 } 3402 3403 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC); 3404 3405 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3406 int oddX1 = (x1 & ~1) * 2; 3407 int evenX0 = ((x0 + 1) & ~1) * 2; 3408 int evenBytes = (oddX1 - evenX0) * sizeof(float); 3409 3410 for(int z = 0; z < internal.samples; z++) 3411 { 3412 for(int y = y0; y < y1; y++) 3413 { 3414 float *target = buffer + (y & ~1) * internal.pitchP + (y & 1) * 2; 3415 3416 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 3417 { 3418 if((x0 & 1) != 0) 3419 { 3420 target[oddX0 + 0] = depth; 3421 target[oddX0 + 2] = depth; 3422 } 3423 3424 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4) 3425 // { 3426 // target[x2 + 0] = depth; 3427 // target[x2 + 1] = depth; 3428 // target[x2 + 2] = depth; 3429 // target[x2 + 3] = depth; 3430 // } 3431 3432 // __asm 3433 // { 3434 // movss xmm0, depth 3435 // shufps xmm0, xmm0, 0x00 3436 // 3437 // mov eax, x0 3438 // add eax, 1 3439 // and eax, 0xFFFFFFFE 3440 // cmp eax, x1 3441 // jge qEnd 3442 // 3443 // mov edi, target 3444 // 3445 // qLoop: 3446 // movntps [edi+8*eax], xmm0 3447 // 3448 // add eax, 2 3449 // cmp eax, x1 3450 // jl qLoop 3451 // qEnd: 3452 // } 3453 3454 memfill4(&target[evenX0], (int&)depth, evenBytes); 3455 3456 if((x1 & 1) != 0) 3457 { 3458 target[oddX1 + 0] = depth; 3459 target[oddX1 + 2] = depth; 3460 } 3461 3462 y++; 3463 } 3464 else 3465 { 3466 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3467 { 3468 target[i] = depth; 3469 } 3470 } 3471 } 3472 3473 buffer += internal.sliceP; 3474 } 3475 3476 unlockInternal(); 3477 } 3478 } 3479 clearStencil(unsigned char s,unsigned char mask,int x0,int y0,int width,int height)3480 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height) 3481 { 3482 if(mask == 0 || width == 0 || height == 0) return; 3483 3484 // Not overlapping 3485 if(x0 > internal.width) return; 3486 if(y0 > internal.height) return; 3487 if(x0 + width < 0) return; 3488 if(y0 + height < 0) return; 3489 3490 // Clip against dimensions 3491 if(x0 < 0) {width += x0; x0 = 0;} 3492 if(x0 + width > internal.width) width = internal.width - x0; 3493 if(y0 < 0) {height += y0; y0 = 0;} 3494 if(y0 + height > internal.height) height = internal.height - y0; 3495 3496 int x1 = x0 + width; 3497 int y1 = y0 + height; 3498 3499 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3500 int oddX1 = (x1 & ~1) * 2; 3501 int evenX0 = ((x0 + 1) & ~1) * 2; 3502 int evenBytes = oddX1 - evenX0; 3503 3504 unsigned char maskedS = s & mask; 3505 unsigned char invMask = ~mask; 3506 unsigned int fill = maskedS; 3507 fill = fill | (fill << 8) | (fill << 16) | (fill << 24); 3508 3509 char *buffer = (char*)lockStencil(0, 0, 0, PUBLIC); 3510 3511 // Stencil buffers are assumed to use quad layout 3512 for(int z = 0; z < stencil.samples; z++) 3513 { 3514 for(int y = y0; y < y1; y++) 3515 { 3516 char *target = buffer + (y & ~1) * stencil.pitchP + (y & 1) * 2; 3517 3518 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once 3519 { 3520 if((x0 & 1) != 0) 3521 { 3522 target[oddX0 + 0] = fill; 3523 target[oddX0 + 2] = fill; 3524 } 3525 3526 memfill4(&target[evenX0], fill, evenBytes); 3527 3528 if((x1 & 1) != 0) 3529 { 3530 target[oddX1 + 0] = fill; 3531 target[oddX1 + 2] = fill; 3532 } 3533 3534 y++; 3535 } 3536 else 3537 { 3538 for(int x = x0; x < x1; x++) 3539 { 3540 int i = (x & ~1) * 2 + (x & 1); 3541 target[i] = maskedS | (target[i] & invMask); 3542 } 3543 } 3544 } 3545 3546 buffer += stencil.sliceP; 3547 } 3548 3549 unlockStencil(); 3550 } 3551 fill(const Color<float> & color,int x0,int y0,int width,int height)3552 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height) 3553 { 3554 unsigned char *row; 3555 Buffer *buffer; 3556 3557 if(internal.dirty) 3558 { 3559 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3560 buffer = &internal; 3561 } 3562 else 3563 { 3564 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3565 buffer = &external; 3566 } 3567 3568 if(buffer->bytes <= 4) 3569 { 3570 int c; 3571 buffer->write(&c, color); 3572 3573 if(buffer->bytes <= 1) c = (c << 8) | c; 3574 if(buffer->bytes <= 2) c = (c << 16) | c; 3575 3576 for(int y = 0; y < height; y++) 3577 { 3578 memfill4(row, c, width * buffer->bytes); 3579 3580 row += buffer->pitchB; 3581 } 3582 } 3583 else // Generic 3584 { 3585 for(int y = 0; y < height; y++) 3586 { 3587 unsigned char *element = row; 3588 3589 for(int x = 0; x < width; x++) 3590 { 3591 buffer->write(element, color); 3592 3593 element += buffer->bytes; 3594 } 3595 3596 row += buffer->pitchB; 3597 } 3598 } 3599 3600 if(buffer == &internal) 3601 { 3602 unlockInternal(); 3603 } 3604 else 3605 { 3606 unlockExternal(); 3607 } 3608 } 3609 copyInternal(const Surface * source,int x,int y,float srcX,float srcY,bool filter)3610 void Surface::copyInternal(const Surface *source, int x, int y, float srcX, float srcY, bool filter) 3611 { 3612 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3613 3614 sw::Color<float> color; 3615 3616 if(!filter) 3617 { 3618 color = source->internal.read((int)srcX, (int)srcY, 0); 3619 } 3620 else // Bilinear filtering 3621 { 3622 color = source->internal.sample(srcX, srcY, 0); 3623 } 3624 3625 internal.write(x, y, color); 3626 } 3627 copyInternal(const Surface * source,int x,int y,int z,float srcX,float srcY,float srcZ,bool filter)3628 void Surface::copyInternal(const Surface *source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter) 3629 { 3630 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3631 3632 sw::Color<float> color; 3633 3634 if(!filter) 3635 { 3636 color = source->internal.read((int)srcX, (int)srcY, int(srcZ)); 3637 } 3638 else // Bilinear filtering 3639 { 3640 color = source->internal.sample(srcX, srcY, srcZ); 3641 } 3642 3643 internal.write(x, y, z, color); 3644 } 3645 copyCubeEdge(Edge dstEdge,Surface * src,Edge srcEdge)3646 void Surface::copyCubeEdge(Edge dstEdge, Surface *src, Edge srcEdge) 3647 { 3648 Surface *dst = this; 3649 3650 // Figure out if the edges to be copied in reverse order respectively from one another 3651 // The copy should be reversed whenever the same edges are contiguous or if we're 3652 // copying top <-> right or bottom <-> left. This is explained by the layout, which is: 3653 // 3654 // | +y | 3655 // | -x | +z | +x | -z | 3656 // | -y | 3657 3658 bool reverse = (srcEdge == dstEdge) || 3659 ((srcEdge == TOP) && (dstEdge == RIGHT)) || 3660 ((srcEdge == RIGHT) && (dstEdge == TOP)) || 3661 ((srcEdge == BOTTOM) && (dstEdge == LEFT)) || 3662 ((srcEdge == LEFT) && (dstEdge == BOTTOM)); 3663 3664 int srcBytes = src->bytes(src->Surface::getInternalFormat()); 3665 int srcPitch = src->getInternalPitchB(); 3666 int dstBytes = dst->bytes(dst->Surface::getInternalFormat()); 3667 int dstPitch = dst->getInternalPitchB(); 3668 3669 int srcW = src->getWidth(); 3670 int srcH = src->getHeight(); 3671 int dstW = dst->getWidth(); 3672 int dstH = dst->getHeight(); 3673 3674 ASSERT(srcW == srcH && dstW == dstH && srcW == dstW && srcBytes == dstBytes); 3675 3676 // Src is expressed in the regular [0, width-1], [0, height-1] space 3677 int srcDelta = ((srcEdge == TOP) || (srcEdge == BOTTOM)) ? srcBytes : srcPitch; 3678 int srcStart = ((srcEdge == BOTTOM) ? srcPitch * (srcH - 1) : ((srcEdge == RIGHT) ? srcBytes * (srcW - 1) : 0)); 3679 3680 // Dst contains borders, so it is expressed in the [-1, width+1], [-1, height+1] space 3681 int dstDelta = (((dstEdge == TOP) || (dstEdge == BOTTOM)) ? dstBytes : dstPitch) * (reverse ? -1 : 1); 3682 int dstStart = ((dstEdge == BOTTOM) ? dstPitch * (dstH + 1) : ((dstEdge == RIGHT) ? dstBytes * (dstW + 1) : 0)) + (reverse ? dstW * -dstDelta : dstDelta); 3683 3684 char *srcBuf = (char*)src->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PRIVATE) + srcStart; 3685 char *dstBuf = (char*)dst->lockInternal(-1, -1, 0, sw::LOCK_READWRITE, sw::PRIVATE) + dstStart; 3686 3687 for(int i = 0; i < srcW; ++i, dstBuf += dstDelta, srcBuf += srcDelta) 3688 { 3689 memcpy(dstBuf, srcBuf, srcBytes); 3690 } 3691 3692 if(dstEdge == LEFT || dstEdge == RIGHT) 3693 { 3694 // TOP and BOTTOM are already set, let's average out the corners 3695 int x0 = (dstEdge == RIGHT) ? dstW : -1; 3696 int y0 = -1; 3697 int x1 = (dstEdge == RIGHT) ? dstW - 1 : 0; 3698 int y1 = 0; 3699 dst->computeCubeCorner(x0, y0, x1, y1); 3700 y0 = dstH; 3701 y1 = dstH - 1; 3702 dst->computeCubeCorner(x0, y0, x1, y1); 3703 } 3704 3705 src->unlockInternal(); 3706 dst->unlockInternal(); 3707 } 3708 computeCubeCorner(int x0,int y0,int x1,int y1)3709 void Surface::computeCubeCorner(int x0, int y0, int x1, int y1) 3710 { 3711 ASSERT(internal.lock != LOCK_UNLOCKED); 3712 3713 sw::Color<float> color = internal.read(x0, y1); 3714 color += internal.read(x1, y0); 3715 color += internal.read(x1, y1); 3716 color *= (1.0f / 3.0f); 3717 3718 internal.write(x0, y0, color); 3719 } 3720 hasStencil() const3721 bool Surface::hasStencil() const 3722 { 3723 return isStencil(external.format); 3724 } 3725 hasDepth() const3726 bool Surface::hasDepth() const 3727 { 3728 return isDepth(external.format); 3729 } 3730 hasPalette() const3731 bool Surface::hasPalette() const 3732 { 3733 return isPalette(external.format); 3734 } 3735 isRenderTarget() const3736 bool Surface::isRenderTarget() const 3737 { 3738 return renderTarget; 3739 } 3740 hasDirtyContents() const3741 bool Surface::hasDirtyContents() const 3742 { 3743 return dirtyContents; 3744 } 3745 markContentsClean()3746 void Surface::markContentsClean() 3747 { 3748 dirtyContents = false; 3749 } 3750 getResource()3751 Resource *Surface::getResource() 3752 { 3753 return resource; 3754 } 3755 identicalFormats() const3756 bool Surface::identicalFormats() const 3757 { 3758 return external.format == internal.format && 3759 external.width == internal.width && 3760 external.height == internal.height && 3761 external.depth == internal.depth && 3762 external.pitchB == internal.pitchB && 3763 external.sliceB == internal.sliceB && 3764 external.border == internal.border && 3765 external.samples == internal.samples; 3766 } 3767 selectInternalFormat(Format format) const3768 Format Surface::selectInternalFormat(Format format) const 3769 { 3770 switch(format) 3771 { 3772 case FORMAT_NULL: 3773 return FORMAT_NULL; 3774 case FORMAT_P8: 3775 case FORMAT_A8P8: 3776 case FORMAT_A4R4G4B4: 3777 case FORMAT_A1R5G5B5: 3778 case FORMAT_A8R3G3B2: 3779 return FORMAT_A8R8G8B8; 3780 case FORMAT_A8: 3781 return FORMAT_A8; 3782 case FORMAT_R8I: 3783 return FORMAT_R8I; 3784 case FORMAT_R8UI: 3785 return FORMAT_R8UI; 3786 case FORMAT_R8_SNORM: 3787 return FORMAT_R8_SNORM; 3788 case FORMAT_R8: 3789 return FORMAT_R8; 3790 case FORMAT_R16I: 3791 return FORMAT_R16I; 3792 case FORMAT_R16UI: 3793 return FORMAT_R16UI; 3794 case FORMAT_R32I: 3795 return FORMAT_R32I; 3796 case FORMAT_R32UI: 3797 return FORMAT_R32UI; 3798 case FORMAT_X16B16G16R16I: 3799 return FORMAT_X16B16G16R16I; 3800 case FORMAT_A16B16G16R16I: 3801 return FORMAT_A16B16G16R16I; 3802 case FORMAT_X16B16G16R16UI: 3803 return FORMAT_X16B16G16R16UI; 3804 case FORMAT_A16B16G16R16UI: 3805 return FORMAT_A16B16G16R16UI; 3806 case FORMAT_A2R10G10B10: 3807 case FORMAT_A2B10G10R10: 3808 case FORMAT_A16B16G16R16: 3809 return FORMAT_A16B16G16R16; 3810 case FORMAT_A2B10G10R10UI: 3811 return FORMAT_A16B16G16R16UI; 3812 case FORMAT_X32B32G32R32I: 3813 return FORMAT_X32B32G32R32I; 3814 case FORMAT_A32B32G32R32I: 3815 return FORMAT_A32B32G32R32I; 3816 case FORMAT_X32B32G32R32UI: 3817 return FORMAT_X32B32G32R32UI; 3818 case FORMAT_A32B32G32R32UI: 3819 return FORMAT_A32B32G32R32UI; 3820 case FORMAT_G8R8I: 3821 return FORMAT_G8R8I; 3822 case FORMAT_G8R8UI: 3823 return FORMAT_G8R8UI; 3824 case FORMAT_G8R8_SNORM: 3825 return FORMAT_G8R8_SNORM; 3826 case FORMAT_G8R8: 3827 return FORMAT_G8R8; 3828 case FORMAT_G16R16I: 3829 return FORMAT_G16R16I; 3830 case FORMAT_G16R16UI: 3831 return FORMAT_G16R16UI; 3832 case FORMAT_G16R16: 3833 return FORMAT_G16R16; 3834 case FORMAT_G32R32I: 3835 return FORMAT_G32R32I; 3836 case FORMAT_G32R32UI: 3837 return FORMAT_G32R32UI; 3838 case FORMAT_A8R8G8B8: 3839 if(lockable || !quadLayoutEnabled) 3840 { 3841 return FORMAT_A8R8G8B8; 3842 } 3843 else 3844 { 3845 return FORMAT_A8G8R8B8Q; 3846 } 3847 case FORMAT_A8B8G8R8I: 3848 return FORMAT_A8B8G8R8I; 3849 case FORMAT_A8B8G8R8UI: 3850 return FORMAT_A8B8G8R8UI; 3851 case FORMAT_A8B8G8R8_SNORM: 3852 return FORMAT_A8B8G8R8_SNORM; 3853 case FORMAT_R5G5B5A1: 3854 case FORMAT_R4G4B4A4: 3855 case FORMAT_A8B8G8R8: 3856 return FORMAT_A8B8G8R8; 3857 case FORMAT_R5G6B5: 3858 return FORMAT_R5G6B5; 3859 case FORMAT_R3G3B2: 3860 case FORMAT_R8G8B8: 3861 case FORMAT_X4R4G4B4: 3862 case FORMAT_X1R5G5B5: 3863 case FORMAT_X8R8G8B8: 3864 if(lockable || !quadLayoutEnabled) 3865 { 3866 return FORMAT_X8R8G8B8; 3867 } 3868 else 3869 { 3870 return FORMAT_X8G8R8B8Q; 3871 } 3872 case FORMAT_X8B8G8R8I: 3873 return FORMAT_X8B8G8R8I; 3874 case FORMAT_X8B8G8R8UI: 3875 return FORMAT_X8B8G8R8UI; 3876 case FORMAT_X8B8G8R8_SNORM: 3877 return FORMAT_X8B8G8R8_SNORM; 3878 case FORMAT_B8G8R8: 3879 case FORMAT_X8B8G8R8: 3880 return FORMAT_X8B8G8R8; 3881 case FORMAT_SRGB8_X8: 3882 return FORMAT_SRGB8_X8; 3883 case FORMAT_SRGB8_A8: 3884 return FORMAT_SRGB8_A8; 3885 // Compressed formats 3886 case FORMAT_DXT1: 3887 case FORMAT_DXT3: 3888 case FORMAT_DXT5: 3889 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3890 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3891 case FORMAT_RGBA8_ETC2_EAC: 3892 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3893 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3894 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3895 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3896 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3897 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3898 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3899 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3900 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3901 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3902 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3903 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3904 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3905 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3906 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3907 return FORMAT_A8R8G8B8; 3908 case FORMAT_RGBA_ASTC_4x4_KHR: 3909 case FORMAT_RGBA_ASTC_5x4_KHR: 3910 case FORMAT_RGBA_ASTC_5x5_KHR: 3911 case FORMAT_RGBA_ASTC_6x5_KHR: 3912 case FORMAT_RGBA_ASTC_6x6_KHR: 3913 case FORMAT_RGBA_ASTC_8x5_KHR: 3914 case FORMAT_RGBA_ASTC_8x6_KHR: 3915 case FORMAT_RGBA_ASTC_8x8_KHR: 3916 case FORMAT_RGBA_ASTC_10x5_KHR: 3917 case FORMAT_RGBA_ASTC_10x6_KHR: 3918 case FORMAT_RGBA_ASTC_10x8_KHR: 3919 case FORMAT_RGBA_ASTC_10x10_KHR: 3920 case FORMAT_RGBA_ASTC_12x10_KHR: 3921 case FORMAT_RGBA_ASTC_12x12_KHR: 3922 // ASTC supports HDR, so a floating point format is required to represent it properly 3923 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported 3924 case FORMAT_ATI1: 3925 return FORMAT_R8; 3926 case FORMAT_R11_EAC: 3927 case FORMAT_SIGNED_R11_EAC: 3928 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient 3929 case FORMAT_ATI2: 3930 return FORMAT_G8R8; 3931 case FORMAT_RG11_EAC: 3932 case FORMAT_SIGNED_RG11_EAC: 3933 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient 3934 case FORMAT_ETC1: 3935 case FORMAT_RGB8_ETC2: 3936 case FORMAT_SRGB8_ETC2: 3937 return FORMAT_X8R8G8B8; 3938 // Bumpmap formats 3939 case FORMAT_V8U8: return FORMAT_V8U8; 3940 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8; 3941 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8; 3942 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8; 3943 case FORMAT_V16U16: return FORMAT_V16U16; 3944 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16; 3945 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16; 3946 // Floating-point formats 3947 case FORMAT_A16F: return FORMAT_A32B32G32R32F; 3948 case FORMAT_R16F: return FORMAT_R32F; 3949 case FORMAT_G16R16F: return FORMAT_G32R32F; 3950 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F; 3951 case FORMAT_X16B16G16R16F: return FORMAT_X32B32G32R32F; 3952 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; 3953 case FORMAT_X16B16G16R16F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED; 3954 case FORMAT_A32F: return FORMAT_A32B32G32R32F; 3955 case FORMAT_R32F: return FORMAT_R32F; 3956 case FORMAT_G32R32F: return FORMAT_G32R32F; 3957 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F; 3958 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F; 3959 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F; 3960 case FORMAT_X32B32G32R32F_UNSIGNED: return FORMAT_X32B32G32R32F_UNSIGNED; 3961 // Luminance formats 3962 case FORMAT_L8: return FORMAT_L8; 3963 case FORMAT_A4L4: return FORMAT_A8L8; 3964 case FORMAT_L16: return FORMAT_L16; 3965 case FORMAT_A8L8: return FORMAT_A8L8; 3966 case FORMAT_L16F: return FORMAT_X32B32G32R32F; 3967 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F; 3968 case FORMAT_L32F: return FORMAT_X32B32G32R32F; 3969 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F; 3970 // Depth/stencil formats 3971 case FORMAT_D16: 3972 case FORMAT_D32: 3973 case FORMAT_D24X8: 3974 if(hasParent) // Texture 3975 { 3976 return FORMAT_D32F_SHADOW; 3977 } 3978 else if(complementaryDepthBuffer) 3979 { 3980 return FORMAT_D32F_COMPLEMENTARY; 3981 } 3982 else 3983 { 3984 return FORMAT_D32F; 3985 } 3986 case FORMAT_D24S8: 3987 case FORMAT_D24FS8: 3988 if(hasParent) // Texture 3989 { 3990 return FORMAT_D32FS8_SHADOW; 3991 } 3992 else if(complementaryDepthBuffer) 3993 { 3994 return FORMAT_D32FS8_COMPLEMENTARY; 3995 } 3996 else 3997 { 3998 return FORMAT_D32FS8; 3999 } 4000 case FORMAT_D32F: return FORMAT_D32F; 4001 case FORMAT_D32FS8: return FORMAT_D32FS8; 4002 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE; 4003 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE; 4004 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; 4005 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; 4006 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; 4007 case FORMAT_S8: return FORMAT_S8; 4008 // YUV formats 4009 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; 4010 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; 4011 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; 4012 default: 4013 ASSERT(false); 4014 } 4015 4016 return FORMAT_NULL; 4017 } 4018 setTexturePalette(unsigned int * palette)4019 void Surface::setTexturePalette(unsigned int *palette) 4020 { 4021 Surface::palette = palette; 4022 Surface::paletteID++; 4023 } 4024 resolve()4025 void Surface::resolve() 4026 { 4027 if(internal.samples <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL) 4028 { 4029 return; 4030 } 4031 4032 ASSERT(internal.depth == 1); // Unimplemented 4033 4034 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE); 4035 4036 int width = internal.width; 4037 int height = internal.height; 4038 int pitch = internal.pitchB; 4039 int slice = internal.sliceB; 4040 4041 unsigned char *source0 = (unsigned char*)source; 4042 unsigned char *source1 = source0 + slice; 4043 unsigned char *source2 = source1 + slice; 4044 unsigned char *source3 = source2 + slice; 4045 unsigned char *source4 = source3 + slice; 4046 unsigned char *source5 = source4 + slice; 4047 unsigned char *source6 = source5 + slice; 4048 unsigned char *source7 = source6 + slice; 4049 unsigned char *source8 = source7 + slice; 4050 unsigned char *source9 = source8 + slice; 4051 unsigned char *sourceA = source9 + slice; 4052 unsigned char *sourceB = sourceA + slice; 4053 unsigned char *sourceC = sourceB + slice; 4054 unsigned char *sourceD = sourceC + slice; 4055 unsigned char *sourceE = sourceD + slice; 4056 unsigned char *sourceF = sourceE + slice; 4057 4058 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || 4059 internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8 || 4060 internal.format == FORMAT_SRGB8_X8 || internal.format == FORMAT_SRGB8_A8) 4061 { 4062 #if defined(__i386__) || defined(__x86_64__) 4063 if(CPUID::supportsSSE2() && (width % 4) == 0) 4064 { 4065 if(internal.samples == 2) 4066 { 4067 for(int y = 0; y < height; y++) 4068 { 4069 for(int x = 0; x < width; x += 4) 4070 { 4071 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4072 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4073 4074 c0 = _mm_avg_epu8(c0, c1); 4075 4076 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4077 } 4078 4079 source0 += pitch; 4080 source1 += pitch; 4081 } 4082 } 4083 else if(internal.samples == 4) 4084 { 4085 for(int y = 0; y < height; y++) 4086 { 4087 for(int x = 0; x < width; x += 4) 4088 { 4089 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4090 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4091 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4092 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4093 4094 c0 = _mm_avg_epu8(c0, c1); 4095 c2 = _mm_avg_epu8(c2, c3); 4096 c0 = _mm_avg_epu8(c0, c2); 4097 4098 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4099 } 4100 4101 source0 += pitch; 4102 source1 += pitch; 4103 source2 += pitch; 4104 source3 += pitch; 4105 } 4106 } 4107 else if(internal.samples == 8) 4108 { 4109 for(int y = 0; y < height; y++) 4110 { 4111 for(int x = 0; x < width; x += 4) 4112 { 4113 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4114 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4115 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4116 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4117 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4118 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4119 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4120 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4121 4122 c0 = _mm_avg_epu8(c0, c1); 4123 c2 = _mm_avg_epu8(c2, c3); 4124 c4 = _mm_avg_epu8(c4, c5); 4125 c6 = _mm_avg_epu8(c6, c7); 4126 c0 = _mm_avg_epu8(c0, c2); 4127 c4 = _mm_avg_epu8(c4, c6); 4128 c0 = _mm_avg_epu8(c0, c4); 4129 4130 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4131 } 4132 4133 source0 += pitch; 4134 source1 += pitch; 4135 source2 += pitch; 4136 source3 += pitch; 4137 source4 += pitch; 4138 source5 += pitch; 4139 source6 += pitch; 4140 source7 += pitch; 4141 } 4142 } 4143 else if(internal.samples == 16) 4144 { 4145 for(int y = 0; y < height; y++) 4146 { 4147 for(int x = 0; x < width; x += 4) 4148 { 4149 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4150 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4151 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4152 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4153 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4154 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4155 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4156 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4157 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4158 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4159 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4160 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4161 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4162 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4163 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4164 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4165 4166 c0 = _mm_avg_epu8(c0, c1); 4167 c2 = _mm_avg_epu8(c2, c3); 4168 c4 = _mm_avg_epu8(c4, c5); 4169 c6 = _mm_avg_epu8(c6, c7); 4170 c8 = _mm_avg_epu8(c8, c9); 4171 cA = _mm_avg_epu8(cA, cB); 4172 cC = _mm_avg_epu8(cC, cD); 4173 cE = _mm_avg_epu8(cE, cF); 4174 c0 = _mm_avg_epu8(c0, c2); 4175 c4 = _mm_avg_epu8(c4, c6); 4176 c8 = _mm_avg_epu8(c8, cA); 4177 cC = _mm_avg_epu8(cC, cE); 4178 c0 = _mm_avg_epu8(c0, c4); 4179 c8 = _mm_avg_epu8(c8, cC); 4180 c0 = _mm_avg_epu8(c0, c8); 4181 4182 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4183 } 4184 4185 source0 += pitch; 4186 source1 += pitch; 4187 source2 += pitch; 4188 source3 += pitch; 4189 source4 += pitch; 4190 source5 += pitch; 4191 source6 += pitch; 4192 source7 += pitch; 4193 source8 += pitch; 4194 source9 += pitch; 4195 sourceA += pitch; 4196 sourceB += pitch; 4197 sourceC += pitch; 4198 sourceD += pitch; 4199 sourceE += pitch; 4200 sourceF += pitch; 4201 } 4202 } 4203 else ASSERT(false); 4204 } 4205 else 4206 #endif 4207 { 4208 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101)) 4209 4210 if(internal.samples == 2) 4211 { 4212 for(int y = 0; y < height; y++) 4213 { 4214 for(int x = 0; x < width; x++) 4215 { 4216 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4217 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4218 4219 c0 = AVERAGE(c0, c1); 4220 4221 *(unsigned int*)(source0 + 4 * x) = c0; 4222 } 4223 4224 source0 += pitch; 4225 source1 += pitch; 4226 } 4227 } 4228 else if(internal.samples == 4) 4229 { 4230 for(int y = 0; y < height; y++) 4231 { 4232 for(int x = 0; x < width; x++) 4233 { 4234 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4235 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4236 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4237 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4238 4239 c0 = AVERAGE(c0, c1); 4240 c2 = AVERAGE(c2, c3); 4241 c0 = AVERAGE(c0, c2); 4242 4243 *(unsigned int*)(source0 + 4 * x) = c0; 4244 } 4245 4246 source0 += pitch; 4247 source1 += pitch; 4248 source2 += pitch; 4249 source3 += pitch; 4250 } 4251 } 4252 else if(internal.samples == 8) 4253 { 4254 for(int y = 0; y < height; y++) 4255 { 4256 for(int x = 0; x < width; x++) 4257 { 4258 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4259 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4260 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4261 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4262 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4263 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4264 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4265 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4266 4267 c0 = AVERAGE(c0, c1); 4268 c2 = AVERAGE(c2, c3); 4269 c4 = AVERAGE(c4, c5); 4270 c6 = AVERAGE(c6, c7); 4271 c0 = AVERAGE(c0, c2); 4272 c4 = AVERAGE(c4, c6); 4273 c0 = AVERAGE(c0, c4); 4274 4275 *(unsigned int*)(source0 + 4 * x) = c0; 4276 } 4277 4278 source0 += pitch; 4279 source1 += pitch; 4280 source2 += pitch; 4281 source3 += pitch; 4282 source4 += pitch; 4283 source5 += pitch; 4284 source6 += pitch; 4285 source7 += pitch; 4286 } 4287 } 4288 else if(internal.samples == 16) 4289 { 4290 for(int y = 0; y < height; y++) 4291 { 4292 for(int x = 0; x < width; x++) 4293 { 4294 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4295 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4296 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4297 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4298 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4299 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4300 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4301 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4302 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4303 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4304 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4305 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4306 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4307 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4308 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4309 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4310 4311 c0 = AVERAGE(c0, c1); 4312 c2 = AVERAGE(c2, c3); 4313 c4 = AVERAGE(c4, c5); 4314 c6 = AVERAGE(c6, c7); 4315 c8 = AVERAGE(c8, c9); 4316 cA = AVERAGE(cA, cB); 4317 cC = AVERAGE(cC, cD); 4318 cE = AVERAGE(cE, cF); 4319 c0 = AVERAGE(c0, c2); 4320 c4 = AVERAGE(c4, c6); 4321 c8 = AVERAGE(c8, cA); 4322 cC = AVERAGE(cC, cE); 4323 c0 = AVERAGE(c0, c4); 4324 c8 = AVERAGE(c8, cC); 4325 c0 = AVERAGE(c0, c8); 4326 4327 *(unsigned int*)(source0 + 4 * x) = c0; 4328 } 4329 4330 source0 += pitch; 4331 source1 += pitch; 4332 source2 += pitch; 4333 source3 += pitch; 4334 source4 += pitch; 4335 source5 += pitch; 4336 source6 += pitch; 4337 source7 += pitch; 4338 source8 += pitch; 4339 source9 += pitch; 4340 sourceA += pitch; 4341 sourceB += pitch; 4342 sourceC += pitch; 4343 sourceD += pitch; 4344 sourceE += pitch; 4345 sourceF += pitch; 4346 } 4347 } 4348 else ASSERT(false); 4349 4350 #undef AVERAGE 4351 } 4352 } 4353 else if(internal.format == FORMAT_G16R16) 4354 { 4355 4356 #if defined(__i386__) || defined(__x86_64__) 4357 if(CPUID::supportsSSE2() && (width % 4) == 0) 4358 { 4359 if(internal.samples == 2) 4360 { 4361 for(int y = 0; y < height; y++) 4362 { 4363 for(int x = 0; x < width; x += 4) 4364 { 4365 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4366 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4367 4368 c0 = _mm_avg_epu16(c0, c1); 4369 4370 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4371 } 4372 4373 source0 += pitch; 4374 source1 += pitch; 4375 } 4376 } 4377 else if(internal.samples == 4) 4378 { 4379 for(int y = 0; y < height; y++) 4380 { 4381 for(int x = 0; x < width; x += 4) 4382 { 4383 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4384 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4385 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4386 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4387 4388 c0 = _mm_avg_epu16(c0, c1); 4389 c2 = _mm_avg_epu16(c2, c3); 4390 c0 = _mm_avg_epu16(c0, c2); 4391 4392 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4393 } 4394 4395 source0 += pitch; 4396 source1 += pitch; 4397 source2 += pitch; 4398 source3 += pitch; 4399 } 4400 } 4401 else if(internal.samples == 8) 4402 { 4403 for(int y = 0; y < height; y++) 4404 { 4405 for(int x = 0; x < width; x += 4) 4406 { 4407 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4408 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4409 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4410 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4411 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4412 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4413 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4414 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4415 4416 c0 = _mm_avg_epu16(c0, c1); 4417 c2 = _mm_avg_epu16(c2, c3); 4418 c4 = _mm_avg_epu16(c4, c5); 4419 c6 = _mm_avg_epu16(c6, c7); 4420 c0 = _mm_avg_epu16(c0, c2); 4421 c4 = _mm_avg_epu16(c4, c6); 4422 c0 = _mm_avg_epu16(c0, c4); 4423 4424 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4425 } 4426 4427 source0 += pitch; 4428 source1 += pitch; 4429 source2 += pitch; 4430 source3 += pitch; 4431 source4 += pitch; 4432 source5 += pitch; 4433 source6 += pitch; 4434 source7 += pitch; 4435 } 4436 } 4437 else if(internal.samples == 16) 4438 { 4439 for(int y = 0; y < height; y++) 4440 { 4441 for(int x = 0; x < width; x += 4) 4442 { 4443 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4444 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4445 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4446 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4447 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4448 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4449 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4450 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4451 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4452 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4453 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4454 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4455 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4456 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4457 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4458 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4459 4460 c0 = _mm_avg_epu16(c0, c1); 4461 c2 = _mm_avg_epu16(c2, c3); 4462 c4 = _mm_avg_epu16(c4, c5); 4463 c6 = _mm_avg_epu16(c6, c7); 4464 c8 = _mm_avg_epu16(c8, c9); 4465 cA = _mm_avg_epu16(cA, cB); 4466 cC = _mm_avg_epu16(cC, cD); 4467 cE = _mm_avg_epu16(cE, cF); 4468 c0 = _mm_avg_epu16(c0, c2); 4469 c4 = _mm_avg_epu16(c4, c6); 4470 c8 = _mm_avg_epu16(c8, cA); 4471 cC = _mm_avg_epu16(cC, cE); 4472 c0 = _mm_avg_epu16(c0, c4); 4473 c8 = _mm_avg_epu16(c8, cC); 4474 c0 = _mm_avg_epu16(c0, c8); 4475 4476 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4477 } 4478 4479 source0 += pitch; 4480 source1 += pitch; 4481 source2 += pitch; 4482 source3 += pitch; 4483 source4 += pitch; 4484 source5 += pitch; 4485 source6 += pitch; 4486 source7 += pitch; 4487 source8 += pitch; 4488 source9 += pitch; 4489 sourceA += pitch; 4490 sourceB += pitch; 4491 sourceC += pitch; 4492 sourceD += pitch; 4493 sourceE += pitch; 4494 sourceF += pitch; 4495 } 4496 } 4497 else ASSERT(false); 4498 } 4499 else 4500 #endif 4501 { 4502 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4503 4504 if(internal.samples == 2) 4505 { 4506 for(int y = 0; y < height; y++) 4507 { 4508 for(int x = 0; x < width; x++) 4509 { 4510 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4511 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4512 4513 c0 = AVERAGE(c0, c1); 4514 4515 *(unsigned int*)(source0 + 4 * x) = c0; 4516 } 4517 4518 source0 += pitch; 4519 source1 += pitch; 4520 } 4521 } 4522 else if(internal.samples == 4) 4523 { 4524 for(int y = 0; y < height; y++) 4525 { 4526 for(int x = 0; x < width; x++) 4527 { 4528 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4529 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4530 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4531 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4532 4533 c0 = AVERAGE(c0, c1); 4534 c2 = AVERAGE(c2, c3); 4535 c0 = AVERAGE(c0, c2); 4536 4537 *(unsigned int*)(source0 + 4 * x) = c0; 4538 } 4539 4540 source0 += pitch; 4541 source1 += pitch; 4542 source2 += pitch; 4543 source3 += pitch; 4544 } 4545 } 4546 else if(internal.samples == 8) 4547 { 4548 for(int y = 0; y < height; y++) 4549 { 4550 for(int x = 0; x < width; x++) 4551 { 4552 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4553 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4554 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4555 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4556 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4557 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4558 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4559 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4560 4561 c0 = AVERAGE(c0, c1); 4562 c2 = AVERAGE(c2, c3); 4563 c4 = AVERAGE(c4, c5); 4564 c6 = AVERAGE(c6, c7); 4565 c0 = AVERAGE(c0, c2); 4566 c4 = AVERAGE(c4, c6); 4567 c0 = AVERAGE(c0, c4); 4568 4569 *(unsigned int*)(source0 + 4 * x) = c0; 4570 } 4571 4572 source0 += pitch; 4573 source1 += pitch; 4574 source2 += pitch; 4575 source3 += pitch; 4576 source4 += pitch; 4577 source5 += pitch; 4578 source6 += pitch; 4579 source7 += pitch; 4580 } 4581 } 4582 else if(internal.samples == 16) 4583 { 4584 for(int y = 0; y < height; y++) 4585 { 4586 for(int x = 0; x < width; x++) 4587 { 4588 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4589 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4590 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4591 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4592 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4593 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4594 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4595 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4596 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4597 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4598 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4599 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4600 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4601 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4602 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4603 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4604 4605 c0 = AVERAGE(c0, c1); 4606 c2 = AVERAGE(c2, c3); 4607 c4 = AVERAGE(c4, c5); 4608 c6 = AVERAGE(c6, c7); 4609 c8 = AVERAGE(c8, c9); 4610 cA = AVERAGE(cA, cB); 4611 cC = AVERAGE(cC, cD); 4612 cE = AVERAGE(cE, cF); 4613 c0 = AVERAGE(c0, c2); 4614 c4 = AVERAGE(c4, c6); 4615 c8 = AVERAGE(c8, cA); 4616 cC = AVERAGE(cC, cE); 4617 c0 = AVERAGE(c0, c4); 4618 c8 = AVERAGE(c8, cC); 4619 c0 = AVERAGE(c0, c8); 4620 4621 *(unsigned int*)(source0 + 4 * x) = c0; 4622 } 4623 4624 source0 += pitch; 4625 source1 += pitch; 4626 source2 += pitch; 4627 source3 += pitch; 4628 source4 += pitch; 4629 source5 += pitch; 4630 source6 += pitch; 4631 source7 += pitch; 4632 source8 += pitch; 4633 source9 += pitch; 4634 sourceA += pitch; 4635 sourceB += pitch; 4636 sourceC += pitch; 4637 sourceD += pitch; 4638 sourceE += pitch; 4639 sourceF += pitch; 4640 } 4641 } 4642 else ASSERT(false); 4643 4644 #undef AVERAGE 4645 } 4646 } 4647 else if(internal.format == FORMAT_A16B16G16R16) 4648 { 4649 #if defined(__i386__) || defined(__x86_64__) 4650 if(CPUID::supportsSSE2() && (width % 2) == 0) 4651 { 4652 if(internal.samples == 2) 4653 { 4654 for(int y = 0; y < height; y++) 4655 { 4656 for(int x = 0; x < width; x += 2) 4657 { 4658 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4659 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4660 4661 c0 = _mm_avg_epu16(c0, c1); 4662 4663 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4664 } 4665 4666 source0 += pitch; 4667 source1 += pitch; 4668 } 4669 } 4670 else if(internal.samples == 4) 4671 { 4672 for(int y = 0; y < height; y++) 4673 { 4674 for(int x = 0; x < width; x += 2) 4675 { 4676 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4677 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4678 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4679 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4680 4681 c0 = _mm_avg_epu16(c0, c1); 4682 c2 = _mm_avg_epu16(c2, c3); 4683 c0 = _mm_avg_epu16(c0, c2); 4684 4685 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4686 } 4687 4688 source0 += pitch; 4689 source1 += pitch; 4690 source2 += pitch; 4691 source3 += pitch; 4692 } 4693 } 4694 else if(internal.samples == 8) 4695 { 4696 for(int y = 0; y < height; y++) 4697 { 4698 for(int x = 0; x < width; x += 2) 4699 { 4700 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4701 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4702 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4703 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4704 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4705 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4706 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4707 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4708 4709 c0 = _mm_avg_epu16(c0, c1); 4710 c2 = _mm_avg_epu16(c2, c3); 4711 c4 = _mm_avg_epu16(c4, c5); 4712 c6 = _mm_avg_epu16(c6, c7); 4713 c0 = _mm_avg_epu16(c0, c2); 4714 c4 = _mm_avg_epu16(c4, c6); 4715 c0 = _mm_avg_epu16(c0, c4); 4716 4717 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4718 } 4719 4720 source0 += pitch; 4721 source1 += pitch; 4722 source2 += pitch; 4723 source3 += pitch; 4724 source4 += pitch; 4725 source5 += pitch; 4726 source6 += pitch; 4727 source7 += pitch; 4728 } 4729 } 4730 else if(internal.samples == 16) 4731 { 4732 for(int y = 0; y < height; y++) 4733 { 4734 for(int x = 0; x < width; x += 2) 4735 { 4736 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4737 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4738 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4739 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4740 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4741 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4742 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4743 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4744 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x)); 4745 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x)); 4746 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x)); 4747 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x)); 4748 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x)); 4749 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x)); 4750 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x)); 4751 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x)); 4752 4753 c0 = _mm_avg_epu16(c0, c1); 4754 c2 = _mm_avg_epu16(c2, c3); 4755 c4 = _mm_avg_epu16(c4, c5); 4756 c6 = _mm_avg_epu16(c6, c7); 4757 c8 = _mm_avg_epu16(c8, c9); 4758 cA = _mm_avg_epu16(cA, cB); 4759 cC = _mm_avg_epu16(cC, cD); 4760 cE = _mm_avg_epu16(cE, cF); 4761 c0 = _mm_avg_epu16(c0, c2); 4762 c4 = _mm_avg_epu16(c4, c6); 4763 c8 = _mm_avg_epu16(c8, cA); 4764 cC = _mm_avg_epu16(cC, cE); 4765 c0 = _mm_avg_epu16(c0, c4); 4766 c8 = _mm_avg_epu16(c8, cC); 4767 c0 = _mm_avg_epu16(c0, c8); 4768 4769 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4770 } 4771 4772 source0 += pitch; 4773 source1 += pitch; 4774 source2 += pitch; 4775 source3 += pitch; 4776 source4 += pitch; 4777 source5 += pitch; 4778 source6 += pitch; 4779 source7 += pitch; 4780 source8 += pitch; 4781 source9 += pitch; 4782 sourceA += pitch; 4783 sourceB += pitch; 4784 sourceC += pitch; 4785 sourceD += pitch; 4786 sourceE += pitch; 4787 sourceF += pitch; 4788 } 4789 } 4790 else ASSERT(false); 4791 } 4792 else 4793 #endif 4794 { 4795 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4796 4797 if(internal.samples == 2) 4798 { 4799 for(int y = 0; y < height; y++) 4800 { 4801 for(int x = 0; x < 2 * width; x++) 4802 { 4803 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4804 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4805 4806 c0 = AVERAGE(c0, c1); 4807 4808 *(unsigned int*)(source0 + 4 * x) = c0; 4809 } 4810 4811 source0 += pitch; 4812 source1 += pitch; 4813 } 4814 } 4815 else if(internal.samples == 4) 4816 { 4817 for(int y = 0; y < height; y++) 4818 { 4819 for(int x = 0; x < 2 * width; x++) 4820 { 4821 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4822 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4823 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4824 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4825 4826 c0 = AVERAGE(c0, c1); 4827 c2 = AVERAGE(c2, c3); 4828 c0 = AVERAGE(c0, c2); 4829 4830 *(unsigned int*)(source0 + 4 * x) = c0; 4831 } 4832 4833 source0 += pitch; 4834 source1 += pitch; 4835 source2 += pitch; 4836 source3 += pitch; 4837 } 4838 } 4839 else if(internal.samples == 8) 4840 { 4841 for(int y = 0; y < height; y++) 4842 { 4843 for(int x = 0; x < 2 * width; x++) 4844 { 4845 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4846 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4847 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4848 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4849 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4850 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4851 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4852 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4853 4854 c0 = AVERAGE(c0, c1); 4855 c2 = AVERAGE(c2, c3); 4856 c4 = AVERAGE(c4, c5); 4857 c6 = AVERAGE(c6, c7); 4858 c0 = AVERAGE(c0, c2); 4859 c4 = AVERAGE(c4, c6); 4860 c0 = AVERAGE(c0, c4); 4861 4862 *(unsigned int*)(source0 + 4 * x) = c0; 4863 } 4864 4865 source0 += pitch; 4866 source1 += pitch; 4867 source2 += pitch; 4868 source3 += pitch; 4869 source4 += pitch; 4870 source5 += pitch; 4871 source6 += pitch; 4872 source7 += pitch; 4873 } 4874 } 4875 else if(internal.samples == 16) 4876 { 4877 for(int y = 0; y < height; y++) 4878 { 4879 for(int x = 0; x < 2 * width; x++) 4880 { 4881 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4882 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4883 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4884 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4885 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4886 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4887 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4888 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4889 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4890 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4891 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4892 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4893 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4894 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4895 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4896 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4897 4898 c0 = AVERAGE(c0, c1); 4899 c2 = AVERAGE(c2, c3); 4900 c4 = AVERAGE(c4, c5); 4901 c6 = AVERAGE(c6, c7); 4902 c8 = AVERAGE(c8, c9); 4903 cA = AVERAGE(cA, cB); 4904 cC = AVERAGE(cC, cD); 4905 cE = AVERAGE(cE, cF); 4906 c0 = AVERAGE(c0, c2); 4907 c4 = AVERAGE(c4, c6); 4908 c8 = AVERAGE(c8, cA); 4909 cC = AVERAGE(cC, cE); 4910 c0 = AVERAGE(c0, c4); 4911 c8 = AVERAGE(c8, cC); 4912 c0 = AVERAGE(c0, c8); 4913 4914 *(unsigned int*)(source0 + 4 * x) = c0; 4915 } 4916 4917 source0 += pitch; 4918 source1 += pitch; 4919 source2 += pitch; 4920 source3 += pitch; 4921 source4 += pitch; 4922 source5 += pitch; 4923 source6 += pitch; 4924 source7 += pitch; 4925 source8 += pitch; 4926 source9 += pitch; 4927 sourceA += pitch; 4928 sourceB += pitch; 4929 sourceC += pitch; 4930 sourceD += pitch; 4931 sourceE += pitch; 4932 sourceF += pitch; 4933 } 4934 } 4935 else ASSERT(false); 4936 4937 #undef AVERAGE 4938 } 4939 } 4940 else if(internal.format == FORMAT_R32F) 4941 { 4942 #if defined(__i386__) || defined(__x86_64__) 4943 if(CPUID::supportsSSE() && (width % 4) == 0) 4944 { 4945 if(internal.samples == 2) 4946 { 4947 for(int y = 0; y < height; y++) 4948 { 4949 for(int x = 0; x < width; x += 4) 4950 { 4951 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4952 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4953 4954 c0 = _mm_add_ps(c0, c1); 4955 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4956 4957 _mm_store_ps((float*)(source0 + 4 * x), c0); 4958 } 4959 4960 source0 += pitch; 4961 source1 += pitch; 4962 } 4963 } 4964 else if(internal.samples == 4) 4965 { 4966 for(int y = 0; y < height; y++) 4967 { 4968 for(int x = 0; x < width; x += 4) 4969 { 4970 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4971 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4972 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4973 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4974 4975 c0 = _mm_add_ps(c0, c1); 4976 c2 = _mm_add_ps(c2, c3); 4977 c0 = _mm_add_ps(c0, c2); 4978 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4979 4980 _mm_store_ps((float*)(source0 + 4 * x), c0); 4981 } 4982 4983 source0 += pitch; 4984 source1 += pitch; 4985 source2 += pitch; 4986 source3 += pitch; 4987 } 4988 } 4989 else if(internal.samples == 8) 4990 { 4991 for(int y = 0; y < height; y++) 4992 { 4993 for(int x = 0; x < width; x += 4) 4994 { 4995 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4996 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4997 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4998 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4999 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 5000 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 5001 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 5002 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 5003 5004 c0 = _mm_add_ps(c0, c1); 5005 c2 = _mm_add_ps(c2, c3); 5006 c4 = _mm_add_ps(c4, c5); 5007 c6 = _mm_add_ps(c6, c7); 5008 c0 = _mm_add_ps(c0, c2); 5009 c4 = _mm_add_ps(c4, c6); 5010 c0 = _mm_add_ps(c0, c4); 5011 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5012 5013 _mm_store_ps((float*)(source0 + 4 * x), c0); 5014 } 5015 5016 source0 += pitch; 5017 source1 += pitch; 5018 source2 += pitch; 5019 source3 += pitch; 5020 source4 += pitch; 5021 source5 += pitch; 5022 source6 += pitch; 5023 source7 += pitch; 5024 } 5025 } 5026 else if(internal.samples == 16) 5027 { 5028 for(int y = 0; y < height; y++) 5029 { 5030 for(int x = 0; x < width; x += 4) 5031 { 5032 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 5033 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 5034 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 5035 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 5036 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 5037 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 5038 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 5039 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 5040 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x)); 5041 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x)); 5042 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x)); 5043 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x)); 5044 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x)); 5045 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x)); 5046 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x)); 5047 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x)); 5048 5049 c0 = _mm_add_ps(c0, c1); 5050 c2 = _mm_add_ps(c2, c3); 5051 c4 = _mm_add_ps(c4, c5); 5052 c6 = _mm_add_ps(c6, c7); 5053 c8 = _mm_add_ps(c8, c9); 5054 cA = _mm_add_ps(cA, cB); 5055 cC = _mm_add_ps(cC, cD); 5056 cE = _mm_add_ps(cE, cF); 5057 c0 = _mm_add_ps(c0, c2); 5058 c4 = _mm_add_ps(c4, c6); 5059 c8 = _mm_add_ps(c8, cA); 5060 cC = _mm_add_ps(cC, cE); 5061 c0 = _mm_add_ps(c0, c4); 5062 c8 = _mm_add_ps(c8, cC); 5063 c0 = _mm_add_ps(c0, c8); 5064 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5065 5066 _mm_store_ps((float*)(source0 + 4 * x), c0); 5067 } 5068 5069 source0 += pitch; 5070 source1 += pitch; 5071 source2 += pitch; 5072 source3 += pitch; 5073 source4 += pitch; 5074 source5 += pitch; 5075 source6 += pitch; 5076 source7 += pitch; 5077 source8 += pitch; 5078 source9 += pitch; 5079 sourceA += pitch; 5080 sourceB += pitch; 5081 sourceC += pitch; 5082 sourceD += pitch; 5083 sourceE += pitch; 5084 sourceF += pitch; 5085 } 5086 } 5087 else ASSERT(false); 5088 } 5089 else 5090 #endif 5091 { 5092 if(internal.samples == 2) 5093 { 5094 for(int y = 0; y < height; y++) 5095 { 5096 for(int x = 0; x < width; x++) 5097 { 5098 float c0 = *(float*)(source0 + 4 * x); 5099 float c1 = *(float*)(source1 + 4 * x); 5100 5101 c0 = c0 + c1; 5102 c0 *= 1.0f / 2.0f; 5103 5104 *(float*)(source0 + 4 * x) = c0; 5105 } 5106 5107 source0 += pitch; 5108 source1 += pitch; 5109 } 5110 } 5111 else if(internal.samples == 4) 5112 { 5113 for(int y = 0; y < height; y++) 5114 { 5115 for(int x = 0; x < width; x++) 5116 { 5117 float c0 = *(float*)(source0 + 4 * x); 5118 float c1 = *(float*)(source1 + 4 * x); 5119 float c2 = *(float*)(source2 + 4 * x); 5120 float c3 = *(float*)(source3 + 4 * x); 5121 5122 c0 = c0 + c1; 5123 c2 = c2 + c3; 5124 c0 = c0 + c2; 5125 c0 *= 1.0f / 4.0f; 5126 5127 *(float*)(source0 + 4 * x) = c0; 5128 } 5129 5130 source0 += pitch; 5131 source1 += pitch; 5132 source2 += pitch; 5133 source3 += pitch; 5134 } 5135 } 5136 else if(internal.samples == 8) 5137 { 5138 for(int y = 0; y < height; y++) 5139 { 5140 for(int x = 0; x < width; x++) 5141 { 5142 float c0 = *(float*)(source0 + 4 * x); 5143 float c1 = *(float*)(source1 + 4 * x); 5144 float c2 = *(float*)(source2 + 4 * x); 5145 float c3 = *(float*)(source3 + 4 * x); 5146 float c4 = *(float*)(source4 + 4 * x); 5147 float c5 = *(float*)(source5 + 4 * x); 5148 float c6 = *(float*)(source6 + 4 * x); 5149 float c7 = *(float*)(source7 + 4 * x); 5150 5151 c0 = c0 + c1; 5152 c2 = c2 + c3; 5153 c4 = c4 + c5; 5154 c6 = c6 + c7; 5155 c0 = c0 + c2; 5156 c4 = c4 + c6; 5157 c0 = c0 + c4; 5158 c0 *= 1.0f / 8.0f; 5159 5160 *(float*)(source0 + 4 * x) = c0; 5161 } 5162 5163 source0 += pitch; 5164 source1 += pitch; 5165 source2 += pitch; 5166 source3 += pitch; 5167 source4 += pitch; 5168 source5 += pitch; 5169 source6 += pitch; 5170 source7 += pitch; 5171 } 5172 } 5173 else if(internal.samples == 16) 5174 { 5175 for(int y = 0; y < height; y++) 5176 { 5177 for(int x = 0; x < width; x++) 5178 { 5179 float c0 = *(float*)(source0 + 4 * x); 5180 float c1 = *(float*)(source1 + 4 * x); 5181 float c2 = *(float*)(source2 + 4 * x); 5182 float c3 = *(float*)(source3 + 4 * x); 5183 float c4 = *(float*)(source4 + 4 * x); 5184 float c5 = *(float*)(source5 + 4 * x); 5185 float c6 = *(float*)(source6 + 4 * x); 5186 float c7 = *(float*)(source7 + 4 * x); 5187 float c8 = *(float*)(source8 + 4 * x); 5188 float c9 = *(float*)(source9 + 4 * x); 5189 float cA = *(float*)(sourceA + 4 * x); 5190 float cB = *(float*)(sourceB + 4 * x); 5191 float cC = *(float*)(sourceC + 4 * x); 5192 float cD = *(float*)(sourceD + 4 * x); 5193 float cE = *(float*)(sourceE + 4 * x); 5194 float cF = *(float*)(sourceF + 4 * x); 5195 5196 c0 = c0 + c1; 5197 c2 = c2 + c3; 5198 c4 = c4 + c5; 5199 c6 = c6 + c7; 5200 c8 = c8 + c9; 5201 cA = cA + cB; 5202 cC = cC + cD; 5203 cE = cE + cF; 5204 c0 = c0 + c2; 5205 c4 = c4 + c6; 5206 c8 = c8 + cA; 5207 cC = cC + cE; 5208 c0 = c0 + c4; 5209 c8 = c8 + cC; 5210 c0 = c0 + c8; 5211 c0 *= 1.0f / 16.0f; 5212 5213 *(float*)(source0 + 4 * x) = c0; 5214 } 5215 5216 source0 += pitch; 5217 source1 += pitch; 5218 source2 += pitch; 5219 source3 += pitch; 5220 source4 += pitch; 5221 source5 += pitch; 5222 source6 += pitch; 5223 source7 += pitch; 5224 source8 += pitch; 5225 source9 += pitch; 5226 sourceA += pitch; 5227 sourceB += pitch; 5228 sourceC += pitch; 5229 sourceD += pitch; 5230 sourceE += pitch; 5231 sourceF += pitch; 5232 } 5233 } 5234 else ASSERT(false); 5235 } 5236 } 5237 else if(internal.format == FORMAT_G32R32F) 5238 { 5239 #if defined(__i386__) || defined(__x86_64__) 5240 if(CPUID::supportsSSE() && (width % 2) == 0) 5241 { 5242 if(internal.samples == 2) 5243 { 5244 for(int y = 0; y < height; y++) 5245 { 5246 for(int x = 0; x < width; x += 2) 5247 { 5248 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5249 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5250 5251 c0 = _mm_add_ps(c0, c1); 5252 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5253 5254 _mm_store_ps((float*)(source0 + 8 * x), c0); 5255 } 5256 5257 source0 += pitch; 5258 source1 += pitch; 5259 } 5260 } 5261 else if(internal.samples == 4) 5262 { 5263 for(int y = 0; y < height; y++) 5264 { 5265 for(int x = 0; x < width; x += 2) 5266 { 5267 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5268 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5269 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5270 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5271 5272 c0 = _mm_add_ps(c0, c1); 5273 c2 = _mm_add_ps(c2, c3); 5274 c0 = _mm_add_ps(c0, c2); 5275 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5276 5277 _mm_store_ps((float*)(source0 + 8 * x), c0); 5278 } 5279 5280 source0 += pitch; 5281 source1 += pitch; 5282 source2 += pitch; 5283 source3 += pitch; 5284 } 5285 } 5286 else if(internal.samples == 8) 5287 { 5288 for(int y = 0; y < height; y++) 5289 { 5290 for(int x = 0; x < width; x += 2) 5291 { 5292 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5293 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5294 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5295 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5296 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5297 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5298 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5299 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5300 5301 c0 = _mm_add_ps(c0, c1); 5302 c2 = _mm_add_ps(c2, c3); 5303 c4 = _mm_add_ps(c4, c5); 5304 c6 = _mm_add_ps(c6, c7); 5305 c0 = _mm_add_ps(c0, c2); 5306 c4 = _mm_add_ps(c4, c6); 5307 c0 = _mm_add_ps(c0, c4); 5308 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5309 5310 _mm_store_ps((float*)(source0 + 8 * x), c0); 5311 } 5312 5313 source0 += pitch; 5314 source1 += pitch; 5315 source2 += pitch; 5316 source3 += pitch; 5317 source4 += pitch; 5318 source5 += pitch; 5319 source6 += pitch; 5320 source7 += pitch; 5321 } 5322 } 5323 else if(internal.samples == 16) 5324 { 5325 for(int y = 0; y < height; y++) 5326 { 5327 for(int x = 0; x < width; x += 2) 5328 { 5329 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 5330 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 5331 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 5332 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 5333 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 5334 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 5335 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 5336 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 5337 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x)); 5338 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x)); 5339 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x)); 5340 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x)); 5341 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x)); 5342 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x)); 5343 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x)); 5344 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x)); 5345 5346 c0 = _mm_add_ps(c0, c1); 5347 c2 = _mm_add_ps(c2, c3); 5348 c4 = _mm_add_ps(c4, c5); 5349 c6 = _mm_add_ps(c6, c7); 5350 c8 = _mm_add_ps(c8, c9); 5351 cA = _mm_add_ps(cA, cB); 5352 cC = _mm_add_ps(cC, cD); 5353 cE = _mm_add_ps(cE, cF); 5354 c0 = _mm_add_ps(c0, c2); 5355 c4 = _mm_add_ps(c4, c6); 5356 c8 = _mm_add_ps(c8, cA); 5357 cC = _mm_add_ps(cC, cE); 5358 c0 = _mm_add_ps(c0, c4); 5359 c8 = _mm_add_ps(c8, cC); 5360 c0 = _mm_add_ps(c0, c8); 5361 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5362 5363 _mm_store_ps((float*)(source0 + 8 * x), c0); 5364 } 5365 5366 source0 += pitch; 5367 source1 += pitch; 5368 source2 += pitch; 5369 source3 += pitch; 5370 source4 += pitch; 5371 source5 += pitch; 5372 source6 += pitch; 5373 source7 += pitch; 5374 source8 += pitch; 5375 source9 += pitch; 5376 sourceA += pitch; 5377 sourceB += pitch; 5378 sourceC += pitch; 5379 sourceD += pitch; 5380 sourceE += pitch; 5381 sourceF += pitch; 5382 } 5383 } 5384 else ASSERT(false); 5385 } 5386 else 5387 #endif 5388 { 5389 if(internal.samples == 2) 5390 { 5391 for(int y = 0; y < height; y++) 5392 { 5393 for(int x = 0; x < 2 * width; x++) 5394 { 5395 float c0 = *(float*)(source0 + 4 * x); 5396 float c1 = *(float*)(source1 + 4 * x); 5397 5398 c0 = c0 + c1; 5399 c0 *= 1.0f / 2.0f; 5400 5401 *(float*)(source0 + 4 * x) = c0; 5402 } 5403 5404 source0 += pitch; 5405 source1 += pitch; 5406 } 5407 } 5408 else if(internal.samples == 4) 5409 { 5410 for(int y = 0; y < height; y++) 5411 { 5412 for(int x = 0; x < 2 * width; x++) 5413 { 5414 float c0 = *(float*)(source0 + 4 * x); 5415 float c1 = *(float*)(source1 + 4 * x); 5416 float c2 = *(float*)(source2 + 4 * x); 5417 float c3 = *(float*)(source3 + 4 * x); 5418 5419 c0 = c0 + c1; 5420 c2 = c2 + c3; 5421 c0 = c0 + c2; 5422 c0 *= 1.0f / 4.0f; 5423 5424 *(float*)(source0 + 4 * x) = c0; 5425 } 5426 5427 source0 += pitch; 5428 source1 += pitch; 5429 source2 += pitch; 5430 source3 += pitch; 5431 } 5432 } 5433 else if(internal.samples == 8) 5434 { 5435 for(int y = 0; y < height; y++) 5436 { 5437 for(int x = 0; x < 2 * width; x++) 5438 { 5439 float c0 = *(float*)(source0 + 4 * x); 5440 float c1 = *(float*)(source1 + 4 * x); 5441 float c2 = *(float*)(source2 + 4 * x); 5442 float c3 = *(float*)(source3 + 4 * x); 5443 float c4 = *(float*)(source4 + 4 * x); 5444 float c5 = *(float*)(source5 + 4 * x); 5445 float c6 = *(float*)(source6 + 4 * x); 5446 float c7 = *(float*)(source7 + 4 * x); 5447 5448 c0 = c0 + c1; 5449 c2 = c2 + c3; 5450 c4 = c4 + c5; 5451 c6 = c6 + c7; 5452 c0 = c0 + c2; 5453 c4 = c4 + c6; 5454 c0 = c0 + c4; 5455 c0 *= 1.0f / 8.0f; 5456 5457 *(float*)(source0 + 4 * x) = c0; 5458 } 5459 5460 source0 += pitch; 5461 source1 += pitch; 5462 source2 += pitch; 5463 source3 += pitch; 5464 source4 += pitch; 5465 source5 += pitch; 5466 source6 += pitch; 5467 source7 += pitch; 5468 } 5469 } 5470 else if(internal.samples == 16) 5471 { 5472 for(int y = 0; y < height; y++) 5473 { 5474 for(int x = 0; x < 2 * width; x++) 5475 { 5476 float c0 = *(float*)(source0 + 4 * x); 5477 float c1 = *(float*)(source1 + 4 * x); 5478 float c2 = *(float*)(source2 + 4 * x); 5479 float c3 = *(float*)(source3 + 4 * x); 5480 float c4 = *(float*)(source4 + 4 * x); 5481 float c5 = *(float*)(source5 + 4 * x); 5482 float c6 = *(float*)(source6 + 4 * x); 5483 float c7 = *(float*)(source7 + 4 * x); 5484 float c8 = *(float*)(source8 + 4 * x); 5485 float c9 = *(float*)(source9 + 4 * x); 5486 float cA = *(float*)(sourceA + 4 * x); 5487 float cB = *(float*)(sourceB + 4 * x); 5488 float cC = *(float*)(sourceC + 4 * x); 5489 float cD = *(float*)(sourceD + 4 * x); 5490 float cE = *(float*)(sourceE + 4 * x); 5491 float cF = *(float*)(sourceF + 4 * x); 5492 5493 c0 = c0 + c1; 5494 c2 = c2 + c3; 5495 c4 = c4 + c5; 5496 c6 = c6 + c7; 5497 c8 = c8 + c9; 5498 cA = cA + cB; 5499 cC = cC + cD; 5500 cE = cE + cF; 5501 c0 = c0 + c2; 5502 c4 = c4 + c6; 5503 c8 = c8 + cA; 5504 cC = cC + cE; 5505 c0 = c0 + c4; 5506 c8 = c8 + cC; 5507 c0 = c0 + c8; 5508 c0 *= 1.0f / 16.0f; 5509 5510 *(float*)(source0 + 4 * x) = c0; 5511 } 5512 5513 source0 += pitch; 5514 source1 += pitch; 5515 source2 += pitch; 5516 source3 += pitch; 5517 source4 += pitch; 5518 source5 += pitch; 5519 source6 += pitch; 5520 source7 += pitch; 5521 source8 += pitch; 5522 source9 += pitch; 5523 sourceA += pitch; 5524 sourceB += pitch; 5525 sourceC += pitch; 5526 sourceD += pitch; 5527 sourceE += pitch; 5528 sourceF += pitch; 5529 } 5530 } 5531 else ASSERT(false); 5532 } 5533 } 5534 else if(internal.format == FORMAT_A32B32G32R32F || 5535 internal.format == FORMAT_X32B32G32R32F || 5536 internal.format == FORMAT_X32B32G32R32F_UNSIGNED) 5537 { 5538 #if defined(__i386__) || defined(__x86_64__) 5539 if(CPUID::supportsSSE()) 5540 { 5541 if(internal.samples == 2) 5542 { 5543 for(int y = 0; y < height; y++) 5544 { 5545 for(int x = 0; x < width; x++) 5546 { 5547 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5548 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5549 5550 c0 = _mm_add_ps(c0, c1); 5551 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5552 5553 _mm_store_ps((float*)(source0 + 16 * x), c0); 5554 } 5555 5556 source0 += pitch; 5557 source1 += pitch; 5558 } 5559 } 5560 else if(internal.samples == 4) 5561 { 5562 for(int y = 0; y < height; y++) 5563 { 5564 for(int x = 0; x < width; x++) 5565 { 5566 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5567 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5568 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5569 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5570 5571 c0 = _mm_add_ps(c0, c1); 5572 c2 = _mm_add_ps(c2, c3); 5573 c0 = _mm_add_ps(c0, c2); 5574 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5575 5576 _mm_store_ps((float*)(source0 + 16 * x), c0); 5577 } 5578 5579 source0 += pitch; 5580 source1 += pitch; 5581 source2 += pitch; 5582 source3 += pitch; 5583 } 5584 } 5585 else if(internal.samples == 8) 5586 { 5587 for(int y = 0; y < height; y++) 5588 { 5589 for(int x = 0; x < width; x++) 5590 { 5591 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5592 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5593 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5594 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5595 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5596 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5597 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5598 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5599 5600 c0 = _mm_add_ps(c0, c1); 5601 c2 = _mm_add_ps(c2, c3); 5602 c4 = _mm_add_ps(c4, c5); 5603 c6 = _mm_add_ps(c6, c7); 5604 c0 = _mm_add_ps(c0, c2); 5605 c4 = _mm_add_ps(c4, c6); 5606 c0 = _mm_add_ps(c0, c4); 5607 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5608 5609 _mm_store_ps((float*)(source0 + 16 * x), c0); 5610 } 5611 5612 source0 += pitch; 5613 source1 += pitch; 5614 source2 += pitch; 5615 source3 += pitch; 5616 source4 += pitch; 5617 source5 += pitch; 5618 source6 += pitch; 5619 source7 += pitch; 5620 } 5621 } 5622 else if(internal.samples == 16) 5623 { 5624 for(int y = 0; y < height; y++) 5625 { 5626 for(int x = 0; x < width; x++) 5627 { 5628 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5629 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5630 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5631 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5632 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5633 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5634 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5635 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5636 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x)); 5637 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x)); 5638 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x)); 5639 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x)); 5640 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x)); 5641 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x)); 5642 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x)); 5643 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x)); 5644 5645 c0 = _mm_add_ps(c0, c1); 5646 c2 = _mm_add_ps(c2, c3); 5647 c4 = _mm_add_ps(c4, c5); 5648 c6 = _mm_add_ps(c6, c7); 5649 c8 = _mm_add_ps(c8, c9); 5650 cA = _mm_add_ps(cA, cB); 5651 cC = _mm_add_ps(cC, cD); 5652 cE = _mm_add_ps(cE, cF); 5653 c0 = _mm_add_ps(c0, c2); 5654 c4 = _mm_add_ps(c4, c6); 5655 c8 = _mm_add_ps(c8, cA); 5656 cC = _mm_add_ps(cC, cE); 5657 c0 = _mm_add_ps(c0, c4); 5658 c8 = _mm_add_ps(c8, cC); 5659 c0 = _mm_add_ps(c0, c8); 5660 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5661 5662 _mm_store_ps((float*)(source0 + 16 * x), c0); 5663 } 5664 5665 source0 += pitch; 5666 source1 += pitch; 5667 source2 += pitch; 5668 source3 += pitch; 5669 source4 += pitch; 5670 source5 += pitch; 5671 source6 += pitch; 5672 source7 += pitch; 5673 source8 += pitch; 5674 source9 += pitch; 5675 sourceA += pitch; 5676 sourceB += pitch; 5677 sourceC += pitch; 5678 sourceD += pitch; 5679 sourceE += pitch; 5680 sourceF += pitch; 5681 } 5682 } 5683 else ASSERT(false); 5684 } 5685 else 5686 #endif 5687 { 5688 if(internal.samples == 2) 5689 { 5690 for(int y = 0; y < height; y++) 5691 { 5692 for(int x = 0; x < 4 * width; x++) 5693 { 5694 float c0 = *(float*)(source0 + 4 * x); 5695 float c1 = *(float*)(source1 + 4 * x); 5696 5697 c0 = c0 + c1; 5698 c0 *= 1.0f / 2.0f; 5699 5700 *(float*)(source0 + 4 * x) = c0; 5701 } 5702 5703 source0 += pitch; 5704 source1 += pitch; 5705 } 5706 } 5707 else if(internal.samples == 4) 5708 { 5709 for(int y = 0; y < height; y++) 5710 { 5711 for(int x = 0; x < 4 * width; x++) 5712 { 5713 float c0 = *(float*)(source0 + 4 * x); 5714 float c1 = *(float*)(source1 + 4 * x); 5715 float c2 = *(float*)(source2 + 4 * x); 5716 float c3 = *(float*)(source3 + 4 * x); 5717 5718 c0 = c0 + c1; 5719 c2 = c2 + c3; 5720 c0 = c0 + c2; 5721 c0 *= 1.0f / 4.0f; 5722 5723 *(float*)(source0 + 4 * x) = c0; 5724 } 5725 5726 source0 += pitch; 5727 source1 += pitch; 5728 source2 += pitch; 5729 source3 += pitch; 5730 } 5731 } 5732 else if(internal.samples == 8) 5733 { 5734 for(int y = 0; y < height; y++) 5735 { 5736 for(int x = 0; x < 4 * width; x++) 5737 { 5738 float c0 = *(float*)(source0 + 4 * x); 5739 float c1 = *(float*)(source1 + 4 * x); 5740 float c2 = *(float*)(source2 + 4 * x); 5741 float c3 = *(float*)(source3 + 4 * x); 5742 float c4 = *(float*)(source4 + 4 * x); 5743 float c5 = *(float*)(source5 + 4 * x); 5744 float c6 = *(float*)(source6 + 4 * x); 5745 float c7 = *(float*)(source7 + 4 * x); 5746 5747 c0 = c0 + c1; 5748 c2 = c2 + c3; 5749 c4 = c4 + c5; 5750 c6 = c6 + c7; 5751 c0 = c0 + c2; 5752 c4 = c4 + c6; 5753 c0 = c0 + c4; 5754 c0 *= 1.0f / 8.0f; 5755 5756 *(float*)(source0 + 4 * x) = c0; 5757 } 5758 5759 source0 += pitch; 5760 source1 += pitch; 5761 source2 += pitch; 5762 source3 += pitch; 5763 source4 += pitch; 5764 source5 += pitch; 5765 source6 += pitch; 5766 source7 += pitch; 5767 } 5768 } 5769 else if(internal.samples == 16) 5770 { 5771 for(int y = 0; y < height; y++) 5772 { 5773 for(int x = 0; x < 4 * width; x++) 5774 { 5775 float c0 = *(float*)(source0 + 4 * x); 5776 float c1 = *(float*)(source1 + 4 * x); 5777 float c2 = *(float*)(source2 + 4 * x); 5778 float c3 = *(float*)(source3 + 4 * x); 5779 float c4 = *(float*)(source4 + 4 * x); 5780 float c5 = *(float*)(source5 + 4 * x); 5781 float c6 = *(float*)(source6 + 4 * x); 5782 float c7 = *(float*)(source7 + 4 * x); 5783 float c8 = *(float*)(source8 + 4 * x); 5784 float c9 = *(float*)(source9 + 4 * x); 5785 float cA = *(float*)(sourceA + 4 * x); 5786 float cB = *(float*)(sourceB + 4 * x); 5787 float cC = *(float*)(sourceC + 4 * x); 5788 float cD = *(float*)(sourceD + 4 * x); 5789 float cE = *(float*)(sourceE + 4 * x); 5790 float cF = *(float*)(sourceF + 4 * x); 5791 5792 c0 = c0 + c1; 5793 c2 = c2 + c3; 5794 c4 = c4 + c5; 5795 c6 = c6 + c7; 5796 c8 = c8 + c9; 5797 cA = cA + cB; 5798 cC = cC + cD; 5799 cE = cE + cF; 5800 c0 = c0 + c2; 5801 c4 = c4 + c6; 5802 c8 = c8 + cA; 5803 cC = cC + cE; 5804 c0 = c0 + c4; 5805 c8 = c8 + cC; 5806 c0 = c0 + c8; 5807 c0 *= 1.0f / 16.0f; 5808 5809 *(float*)(source0 + 4 * x) = c0; 5810 } 5811 5812 source0 += pitch; 5813 source1 += pitch; 5814 source2 += pitch; 5815 source3 += pitch; 5816 source4 += pitch; 5817 source5 += pitch; 5818 source6 += pitch; 5819 source7 += pitch; 5820 source8 += pitch; 5821 source9 += pitch; 5822 sourceA += pitch; 5823 sourceB += pitch; 5824 sourceC += pitch; 5825 sourceD += pitch; 5826 sourceE += pitch; 5827 sourceF += pitch; 5828 } 5829 } 5830 else ASSERT(false); 5831 } 5832 } 5833 else if(internal.format == FORMAT_R5G6B5) 5834 { 5835 #if defined(__i386__) || defined(__x86_64__) 5836 if(CPUID::supportsSSE2() && (width % 8) == 0) 5837 { 5838 if(internal.samples == 2) 5839 { 5840 for(int y = 0; y < height; y++) 5841 { 5842 for(int x = 0; x < width; x += 8) 5843 { 5844 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5845 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5846 5847 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5848 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5849 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5850 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5851 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5852 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5853 5854 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5855 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5856 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5857 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5858 c0 = _mm_or_si128(c0, c1); 5859 5860 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5861 } 5862 5863 source0 += pitch; 5864 source1 += pitch; 5865 } 5866 } 5867 else if(internal.samples == 4) 5868 { 5869 for(int y = 0; y < height; y++) 5870 { 5871 for(int x = 0; x < width; x += 8) 5872 { 5873 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5874 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5875 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5876 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5877 5878 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5879 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5880 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5881 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5882 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5883 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5884 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5885 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5886 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5887 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5888 5889 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5890 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5891 c0 = _mm_avg_epu8(c0, c2); 5892 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5893 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5894 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5895 c1 = _mm_avg_epu16(c1, c3); 5896 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5897 c0 = _mm_or_si128(c0, c1); 5898 5899 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5900 } 5901 5902 source0 += pitch; 5903 source1 += pitch; 5904 source2 += pitch; 5905 source3 += pitch; 5906 } 5907 } 5908 else if(internal.samples == 8) 5909 { 5910 for(int y = 0; y < height; y++) 5911 { 5912 for(int x = 0; x < width; x += 8) 5913 { 5914 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5915 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5916 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5917 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5918 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5919 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5920 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5921 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5922 5923 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5924 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5925 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5926 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5927 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5928 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5929 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5930 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5931 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5932 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5933 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5934 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5935 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5936 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5937 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5938 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5939 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5940 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5941 5942 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5943 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5944 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5945 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5946 c0 = _mm_avg_epu8(c0, c2); 5947 c4 = _mm_avg_epu8(c4, c6); 5948 c0 = _mm_avg_epu8(c0, c4); 5949 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5950 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5951 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5952 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5953 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5954 c1 = _mm_avg_epu16(c1, c3); 5955 c5 = _mm_avg_epu16(c5, c7); 5956 c1 = _mm_avg_epu16(c1, c5); 5957 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5958 c0 = _mm_or_si128(c0, c1); 5959 5960 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5961 } 5962 5963 source0 += pitch; 5964 source1 += pitch; 5965 source2 += pitch; 5966 source3 += pitch; 5967 source4 += pitch; 5968 source5 += pitch; 5969 source6 += pitch; 5970 source7 += pitch; 5971 } 5972 } 5973 else if(internal.samples == 16) 5974 { 5975 for(int y = 0; y < height; y++) 5976 { 5977 for(int x = 0; x < width; x += 8) 5978 { 5979 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5980 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5981 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5982 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5983 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5984 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5985 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5986 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5987 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x)); 5988 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x)); 5989 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x)); 5990 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x)); 5991 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x)); 5992 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x)); 5993 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x)); 5994 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x)); 5995 5996 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5997 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5998 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5999 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 6000 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 6001 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 6002 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 6003 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 6004 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 6005 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 6006 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 6007 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 6008 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 6009 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 6010 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 6011 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 6012 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 6013 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 6014 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b)); 6015 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_)); 6016 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b)); 6017 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_)); 6018 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b)); 6019 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_)); 6020 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b)); 6021 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_)); 6022 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b)); 6023 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_)); 6024 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b)); 6025 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_)); 6026 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b)); 6027 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_)); 6028 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b)); 6029 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_)); 6030 6031 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 6032 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 6033 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 6034 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 6035 c8 = _mm_avg_epu8(c8_r_b, c9_r_b); 6036 cA = _mm_avg_epu8(cA_r_b, cB_r_b); 6037 cC = _mm_avg_epu8(cC_r_b, cD_r_b); 6038 cE = _mm_avg_epu8(cE_r_b, cF_r_b); 6039 c0 = _mm_avg_epu8(c0, c2); 6040 c4 = _mm_avg_epu8(c4, c6); 6041 c8 = _mm_avg_epu8(c8, cA); 6042 cC = _mm_avg_epu8(cC, cE); 6043 c0 = _mm_avg_epu8(c0, c4); 6044 c8 = _mm_avg_epu8(c8, cC); 6045 c0 = _mm_avg_epu8(c0, c8); 6046 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 6047 c1 = _mm_avg_epu16(c0__g_, c1__g_); 6048 c3 = _mm_avg_epu16(c2__g_, c3__g_); 6049 c5 = _mm_avg_epu16(c4__g_, c5__g_); 6050 c7 = _mm_avg_epu16(c6__g_, c7__g_); 6051 c9 = _mm_avg_epu16(c8__g_, c9__g_); 6052 cB = _mm_avg_epu16(cA__g_, cB__g_); 6053 cD = _mm_avg_epu16(cC__g_, cD__g_); 6054 cF = _mm_avg_epu16(cE__g_, cF__g_); 6055 c1 = _mm_avg_epu8(c1, c3); 6056 c5 = _mm_avg_epu8(c5, c7); 6057 c9 = _mm_avg_epu8(c9, cB); 6058 cD = _mm_avg_epu8(cD, cF); 6059 c1 = _mm_avg_epu8(c1, c5); 6060 c9 = _mm_avg_epu8(c9, cD); 6061 c1 = _mm_avg_epu8(c1, c9); 6062 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 6063 c0 = _mm_or_si128(c0, c1); 6064 6065 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 6066 } 6067 6068 source0 += pitch; 6069 source1 += pitch; 6070 source2 += pitch; 6071 source3 += pitch; 6072 source4 += pitch; 6073 source5 += pitch; 6074 source6 += pitch; 6075 source7 += pitch; 6076 source8 += pitch; 6077 source9 += pitch; 6078 sourceA += pitch; 6079 sourceB += pitch; 6080 sourceC += pitch; 6081 sourceD += pitch; 6082 sourceE += pitch; 6083 sourceF += pitch; 6084 } 6085 } 6086 else ASSERT(false); 6087 } 6088 else 6089 #endif 6090 { 6091 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821)) 6092 6093 if(internal.samples == 2) 6094 { 6095 for(int y = 0; y < height; y++) 6096 { 6097 for(int x = 0; x < width; x++) 6098 { 6099 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6100 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6101 6102 c0 = AVERAGE(c0, c1); 6103 6104 *(unsigned short*)(source0 + 2 * x) = c0; 6105 } 6106 6107 source0 += pitch; 6108 source1 += pitch; 6109 } 6110 } 6111 else if(internal.samples == 4) 6112 { 6113 for(int y = 0; y < height; y++) 6114 { 6115 for(int x = 0; x < width; x++) 6116 { 6117 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6118 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6119 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 6120 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 6121 6122 c0 = AVERAGE(c0, c1); 6123 c2 = AVERAGE(c2, c3); 6124 c0 = AVERAGE(c0, c2); 6125 6126 *(unsigned short*)(source0 + 2 * x) = c0; 6127 } 6128 6129 source0 += pitch; 6130 source1 += pitch; 6131 source2 += pitch; 6132 source3 += pitch; 6133 } 6134 } 6135 else if(internal.samples == 8) 6136 { 6137 for(int y = 0; y < height; y++) 6138 { 6139 for(int x = 0; x < width; x++) 6140 { 6141 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6142 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6143 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 6144 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 6145 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 6146 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 6147 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 6148 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 6149 6150 c0 = AVERAGE(c0, c1); 6151 c2 = AVERAGE(c2, c3); 6152 c4 = AVERAGE(c4, c5); 6153 c6 = AVERAGE(c6, c7); 6154 c0 = AVERAGE(c0, c2); 6155 c4 = AVERAGE(c4, c6); 6156 c0 = AVERAGE(c0, c4); 6157 6158 *(unsigned short*)(source0 + 2 * x) = c0; 6159 } 6160 6161 source0 += pitch; 6162 source1 += pitch; 6163 source2 += pitch; 6164 source3 += pitch; 6165 source4 += pitch; 6166 source5 += pitch; 6167 source6 += pitch; 6168 source7 += pitch; 6169 } 6170 } 6171 else if(internal.samples == 16) 6172 { 6173 for(int y = 0; y < height; y++) 6174 { 6175 for(int x = 0; x < width; x++) 6176 { 6177 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 6178 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 6179 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 6180 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 6181 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 6182 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 6183 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 6184 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 6185 unsigned short c8 = *(unsigned short*)(source8 + 2 * x); 6186 unsigned short c9 = *(unsigned short*)(source9 + 2 * x); 6187 unsigned short cA = *(unsigned short*)(sourceA + 2 * x); 6188 unsigned short cB = *(unsigned short*)(sourceB + 2 * x); 6189 unsigned short cC = *(unsigned short*)(sourceC + 2 * x); 6190 unsigned short cD = *(unsigned short*)(sourceD + 2 * x); 6191 unsigned short cE = *(unsigned short*)(sourceE + 2 * x); 6192 unsigned short cF = *(unsigned short*)(sourceF + 2 * x); 6193 6194 c0 = AVERAGE(c0, c1); 6195 c2 = AVERAGE(c2, c3); 6196 c4 = AVERAGE(c4, c5); 6197 c6 = AVERAGE(c6, c7); 6198 c8 = AVERAGE(c8, c9); 6199 cA = AVERAGE(cA, cB); 6200 cC = AVERAGE(cC, cD); 6201 cE = AVERAGE(cE, cF); 6202 c0 = AVERAGE(c0, c2); 6203 c4 = AVERAGE(c4, c6); 6204 c8 = AVERAGE(c8, cA); 6205 cC = AVERAGE(cC, cE); 6206 c0 = AVERAGE(c0, c4); 6207 c8 = AVERAGE(c8, cC); 6208 c0 = AVERAGE(c0, c8); 6209 6210 *(unsigned short*)(source0 + 2 * x) = c0; 6211 } 6212 6213 source0 += pitch; 6214 source1 += pitch; 6215 source2 += pitch; 6216 source3 += pitch; 6217 source4 += pitch; 6218 source5 += pitch; 6219 source6 += pitch; 6220 source7 += pitch; 6221 source8 += pitch; 6222 source9 += pitch; 6223 sourceA += pitch; 6224 sourceB += pitch; 6225 sourceC += pitch; 6226 sourceD += pitch; 6227 sourceE += pitch; 6228 sourceF += pitch; 6229 } 6230 } 6231 else ASSERT(false); 6232 6233 #undef AVERAGE 6234 } 6235 } 6236 else 6237 { 6238 // UNIMPLEMENTED(); 6239 } 6240 } 6241 } 6242