1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 Google Inc.
6 * Copyright (c) 2019 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief YCbCr Test Utilities
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktYCbCrUtil.hpp"
26
27 #include "vkQueryUtil.hpp"
28 #include "vkRefUtil.hpp"
29 #include "vkTypeUtil.hpp"
30 #include "vkCmdUtil.hpp"
31
32 #include "tcuTextureUtil.hpp"
33 #include "deMath.h"
34 #include "deFloat16.h"
35 #include "tcuVector.hpp"
36 #include "tcuVectorUtil.hpp"
37
38 #include "deSTLUtil.hpp"
39 #include "deUniquePtr.hpp"
40
41 #include <limits>
42
43 namespace vkt
44 {
45 namespace ycbcr
46 {
47
48 using namespace vk;
49
50 using de::MovePtr;
51 using tcu::FloatFormat;
52 using tcu::Interval;
53 using tcu::IVec2;
54 using tcu::IVec4;
55 using tcu::UVec2;
56 using tcu::UVec4;
57 using tcu::Vec2;
58 using tcu::Vec4;
59 using std::vector;
60 using std::string;
61
62 // MultiPlaneImageData
63
MultiPlaneImageData(VkFormat format,const UVec2 & size)64 MultiPlaneImageData::MultiPlaneImageData (VkFormat format, const UVec2& size)
65 : m_format (format)
66 , m_description (getPlanarFormatDescription(format))
67 , m_size (size)
68 {
69 for (deUint32 planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
70 m_planeData[planeNdx].resize(getPlaneSizeInBytes(m_description, size, planeNdx, 0, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY));
71 }
72
MultiPlaneImageData(const MultiPlaneImageData & other)73 MultiPlaneImageData::MultiPlaneImageData (const MultiPlaneImageData& other)
74 : m_format (other.m_format)
75 , m_description (other.m_description)
76 , m_size (other.m_size)
77 {
78 for (deUint32 planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
79 m_planeData[planeNdx] = other.m_planeData[planeNdx];
80 }
81
~MultiPlaneImageData(void)82 MultiPlaneImageData::~MultiPlaneImageData (void)
83 {
84 }
85
getChannelAccess(deUint32 channelNdx)86 tcu::PixelBufferAccess MultiPlaneImageData::getChannelAccess (deUint32 channelNdx)
87 {
88 void* planePtrs[PlanarFormatDescription::MAX_PLANES];
89 deUint32 planeRowPitches[PlanarFormatDescription::MAX_PLANES];
90
91 for (deUint32 planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
92 {
93 const deUint32 planeW = m_size.x() / ( m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
94 planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
95 planePtrs[planeNdx] = &m_planeData[planeNdx][0];
96 }
97
98 return vk::getChannelAccess(m_description,
99 m_size,
100 planeRowPitches,
101 planePtrs,
102 channelNdx);
103 }
104
getChannelAccess(deUint32 channelNdx) const105 tcu::ConstPixelBufferAccess MultiPlaneImageData::getChannelAccess (deUint32 channelNdx) const
106 {
107 const void* planePtrs[PlanarFormatDescription::MAX_PLANES];
108 deUint32 planeRowPitches[PlanarFormatDescription::MAX_PLANES];
109
110 for (deUint32 planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
111 {
112 const deUint32 planeW = m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
113 planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
114 planePtrs[planeNdx] = &m_planeData[planeNdx][0];
115 }
116
117 return vk::getChannelAccess(m_description,
118 m_size,
119 planeRowPitches,
120 planePtrs,
121 channelNdx);
122 }
123
124 // Misc utilities
125
126 namespace
127 {
128
allocateStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)129 void allocateStagingBuffers (const DeviceInterface& vkd,
130 VkDevice device,
131 Allocator& allocator,
132 const MultiPlaneImageData& imageData,
133 vector<VkBufferSp>* buffers,
134 vector<AllocationSp>* allocations)
135 {
136 for (deUint32 planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
137 {
138 const VkBufferCreateInfo bufferInfo =
139 {
140 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
141 DE_NULL,
142 (VkBufferCreateFlags)0u,
143 (VkDeviceSize)imageData.getPlaneSize(planeNdx),
144 VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT,
145 VK_SHARING_MODE_EXCLUSIVE,
146 0u,
147 (const deUint32*)DE_NULL,
148 };
149 Move<VkBuffer> buffer (createBuffer(vkd, device, &bufferInfo));
150 MovePtr<Allocation> allocation (allocator.allocate(getBufferMemoryRequirements(vkd, device, *buffer),
151 MemoryRequirement::HostVisible|MemoryRequirement::Any));
152
153 VK_CHECK(vkd.bindBufferMemory(device, *buffer, allocation->getMemory(), allocation->getOffset()));
154
155 buffers->push_back(VkBufferSp(new Unique<VkBuffer>(buffer)));
156 allocations->push_back(AllocationSp(allocation.release()));
157 }
158 }
159
allocateAndWriteStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)160 void allocateAndWriteStagingBuffers (const DeviceInterface& vkd,
161 VkDevice device,
162 Allocator& allocator,
163 const MultiPlaneImageData& imageData,
164 vector<VkBufferSp>* buffers,
165 vector<AllocationSp>* allocations)
166 {
167 allocateStagingBuffers(vkd, device, allocator, imageData, buffers, allocations);
168
169 for (deUint32 planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
170 {
171 deMemcpy((*allocations)[planeNdx]->getHostPtr(), imageData.getPlanePtr(planeNdx), imageData.getPlaneSize(planeNdx));
172 flushMappedMemoryRange(vkd, device, (*allocations)[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
173 }
174 }
175
readStagingBuffers(MultiPlaneImageData * imageData,const DeviceInterface & vkd,VkDevice device,const vector<AllocationSp> & allocations)176 void readStagingBuffers (MultiPlaneImageData* imageData,
177 const DeviceInterface& vkd,
178 VkDevice device,
179 const vector<AllocationSp>& allocations)
180 {
181 for (deUint32 planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
182 {
183 invalidateMappedMemoryRange(vkd, device, allocations[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
184 deMemcpy(imageData->getPlanePtr(planeNdx), allocations[planeNdx]->getHostPtr(), imageData->getPlaneSize(planeNdx));
185 }
186 }
187
188 } // anonymous
189
checkImageSupport(Context & context,VkFormat format,VkImageCreateFlags createFlags,VkImageTiling tiling)190 void checkImageSupport (Context& context, VkFormat format, VkImageCreateFlags createFlags, VkImageTiling tiling)
191 {
192 const bool disjoint = (createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0;
193 const VkPhysicalDeviceSamplerYcbcrConversionFeatures features = context.getSamplerYcbcrConversionFeatures();
194
195 if (features.samplerYcbcrConversion == VK_FALSE)
196 TCU_THROW(NotSupportedError, "samplerYcbcrConversion is not supported");
197
198 if (disjoint)
199 {
200 context.requireDeviceFunctionality("VK_KHR_bind_memory2");
201 context.requireDeviceFunctionality("VK_KHR_get_memory_requirements2");
202 }
203
204 {
205 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
206 context.getPhysicalDevice(),
207 format);
208 const VkFormatFeatureFlags featureFlags = tiling == VK_IMAGE_TILING_OPTIMAL
209 ? formatProperties.optimalTilingFeatures
210 : formatProperties.linearTilingFeatures;
211
212 if ((featureFlags & (VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT)) == 0)
213 TCU_THROW(NotSupportedError, "YCbCr conversion is not supported for format");
214
215 if (disjoint && ((featureFlags & VK_FORMAT_FEATURE_DISJOINT_BIT) == 0))
216 TCU_THROW(NotSupportedError, "Disjoint planes are not supported for format");
217 }
218 }
219
fillRandomNoNaN(de::Random * randomGen,deUint8 * const data,deUint32 size,const vk::VkFormat format)220 void fillRandomNoNaN(de::Random* randomGen, deUint8* const data, deUint32 size, const vk::VkFormat format)
221 {
222 bool isFloat = false;
223 deUint32 stride = 1;
224
225 switch (format)
226 {
227 case vk::VK_FORMAT_B10G11R11_UFLOAT_PACK32:
228 isFloat = true;
229 stride = 1;
230 break;
231 case vk::VK_FORMAT_R16_SFLOAT:
232 case vk::VK_FORMAT_R16G16_SFLOAT:
233 case vk::VK_FORMAT_R16G16B16_SFLOAT:
234 case vk::VK_FORMAT_R16G16B16A16_SFLOAT:
235 isFloat = true;
236 stride = 2;
237 break;
238 case vk::VK_FORMAT_R32_SFLOAT:
239 case vk::VK_FORMAT_R32G32_SFLOAT:
240 case vk::VK_FORMAT_R32G32B32_SFLOAT:
241 case vk::VK_FORMAT_R32G32B32A32_SFLOAT:
242 isFloat = true;
243 stride = 4;
244 break;
245 case vk::VK_FORMAT_R64_SFLOAT:
246 case vk::VK_FORMAT_R64G64_SFLOAT:
247 case vk::VK_FORMAT_R64G64B64_SFLOAT:
248 case vk::VK_FORMAT_R64G64B64A64_SFLOAT:
249 isFloat = true;
250 stride = 8;
251 break;
252 default:
253 stride = 1;
254 break;
255 }
256
257 if (isFloat) {
258 deUint32 ndx = 0;
259 for (; ndx < size - stride + 1; ndx += stride)
260 {
261 if (stride == 1) {
262 // Set first bit of each channel to 0 to avoid NaNs, only format is B10G11R11
263 const deUint8 mask[] = { 0x7F, 0xDF, 0xFB, 0xFF };
264 // Apply mask for both endians
265 data[ndx] = (randomGen->getUint8() & mask[ndx % 4]) & mask[3 - ndx % 4];
266 }
267 else if (stride == 2)
268 {
269 deFloat16* ptr = reinterpret_cast<deFloat16*>(&data[ndx]);
270 *ptr = deFloat32To16(randomGen->getFloat());
271 }
272 else if (stride == 4)
273 {
274 float* ptr = reinterpret_cast<float*>(&data[ndx]);
275 *ptr = randomGen->getFloat();
276 }
277 else if (stride == 8)
278 {
279 double* ptr = reinterpret_cast<double*>(&data[ndx]);
280 *ptr = randomGen->getDouble();
281 }
282 }
283 while (ndx < size) {
284 data[ndx] = 0;
285 }
286 }
287 else
288 {
289 for (deUint32 ndx = 0; ndx < size; ++ndx)
290 {
291 data[ndx] = randomGen->getUint8();
292 }
293 }
294 }
295
296 // When noNan is true, fillRandom does not generate NaNs in float formats.
fillRandom(de::Random * randomGen,MultiPlaneImageData * imageData,const vk::VkFormat format,const bool noNan)297 void fillRandom (de::Random* randomGen, MultiPlaneImageData* imageData, const vk::VkFormat format, const bool noNan)
298 {
299 for (deUint32 planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
300 {
301 const size_t planeSize = imageData->getPlaneSize(planeNdx);
302 deUint8* const planePtr = (deUint8*)imageData->getPlanePtr(planeNdx);
303
304 if (noNan) {
305 fillRandomNoNaN(randomGen, planePtr, (deUint32)planeSize, format);
306 }
307 else
308 {
309 for (size_t ndx = 0; ndx < planeSize; ++ndx)
310 {
311 planePtr[ndx] = randomGen->getUint8();
312 }
313 }
314 }
315 }
316
fillGradient(MultiPlaneImageData * imageData,const tcu::Vec4 & minVal,const tcu::Vec4 & maxVal)317 void fillGradient (MultiPlaneImageData* imageData, const tcu::Vec4& minVal, const tcu::Vec4& maxVal)
318 {
319 const PlanarFormatDescription& formatInfo = imageData->getDescription();
320
321 // \todo [pyry] Optimize: no point in re-rendering source gradient for each channel.
322
323 for (deUint32 channelNdx = 0; channelNdx < 4; channelNdx++)
324 {
325 if (formatInfo.hasChannelNdx(channelNdx))
326 {
327 const tcu::PixelBufferAccess channelAccess = imageData->getChannelAccess(channelNdx);
328 tcu::TextureLevel tmpTexture (tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::FLOAT), channelAccess.getWidth(), channelAccess.getHeight());
329 const tcu::ConstPixelBufferAccess tmpAccess = tmpTexture.getAccess();
330
331 tcu::fillWithComponentGradients(tmpTexture, minVal, maxVal);
332
333 for (int y = 0; y < channelAccess.getHeight(); ++y)
334 for (int x = 0; x < channelAccess.getWidth(); ++x)
335 {
336 channelAccess.setPixel(tcu::Vec4(tmpAccess.getPixel(x, y)[channelNdx]), x, y);
337 }
338 }
339 }
340 }
341
fillZero(MultiPlaneImageData * imageData)342 void fillZero (MultiPlaneImageData* imageData)
343 {
344 for (deUint32 planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
345 deMemset(imageData->getPlanePtr(planeNdx), 0, imageData->getPlaneSize(planeNdx));
346 }
347
allocateAndBindImageMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkImage image,VkFormat format,VkImageCreateFlags createFlags,vk::MemoryRequirement requirement)348 vector<AllocationSp> allocateAndBindImageMemory (const DeviceInterface& vkd,
349 VkDevice device,
350 Allocator& allocator,
351 VkImage image,
352 VkFormat format,
353 VkImageCreateFlags createFlags,
354 vk::MemoryRequirement requirement)
355 {
356 vector<AllocationSp> allocations;
357
358 if ((createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0)
359 {
360 const deUint32 numPlanes = getPlaneCount(format);
361
362 bindImagePlanesMemory(vkd, device, image, numPlanes, allocations, allocator, requirement);
363 }
364 else
365 {
366 const VkMemoryRequirements reqs = getImageMemoryRequirements(vkd, device, image);
367
368 allocations.push_back(AllocationSp(allocator.allocate(reqs, requirement).release()));
369
370 VK_CHECK(vkd.bindImageMemory(device, image, allocations.back()->getMemory(), allocations.back()->getOffset()));
371 }
372
373 return allocations;
374 }
375
uploadImage(const DeviceInterface & vkd,VkDevice device,deUint32 queueFamilyNdx,Allocator & allocator,VkImage image,const MultiPlaneImageData & imageData,VkAccessFlags nextAccess,VkImageLayout finalLayout,deUint32 arrayLayer)376 void uploadImage (const DeviceInterface& vkd,
377 VkDevice device,
378 deUint32 queueFamilyNdx,
379 Allocator& allocator,
380 VkImage image,
381 const MultiPlaneImageData& imageData,
382 VkAccessFlags nextAccess,
383 VkImageLayout finalLayout,
384 deUint32 arrayLayer)
385 {
386 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
387 const Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
388 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
389 vector<VkBufferSp> stagingBuffers;
390 vector<AllocationSp> stagingMemory;
391
392 const PlanarFormatDescription& formatDesc = imageData.getDescription();
393
394 allocateAndWriteStagingBuffers(vkd, device, allocator, imageData, &stagingBuffers, &stagingMemory);
395
396 beginCommandBuffer(vkd, *cmdBuffer);
397
398 for (deUint32 planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
399 {
400 const VkImageAspectFlagBits aspect = (formatDesc.numPlanes > 1)
401 ? getPlaneAspect(planeNdx)
402 : VK_IMAGE_ASPECT_COLOR_BIT;
403 const VkExtent3D imageExtent = makeExtent3D(imageData.getSize().x(), imageData.getSize().y(), 1u);
404 const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
405 const VkBufferImageCopy copy =
406 {
407 0u, // bufferOffset
408 0u, // bufferRowLength
409 0u, // bufferImageHeight
410 { (VkImageAspectFlags)aspect, 0u, arrayLayer, 1u },
411 makeOffset3D(0u, 0u, 0u),
412 planeExtent
413 };
414
415 {
416 const VkImageMemoryBarrier preCopyBarrier =
417 {
418 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
419 DE_NULL,
420 (VkAccessFlags)0,
421 VK_ACCESS_TRANSFER_WRITE_BIT,
422 VK_IMAGE_LAYOUT_UNDEFINED,
423 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
424 VK_QUEUE_FAMILY_IGNORED,
425 VK_QUEUE_FAMILY_IGNORED,
426 image,
427 { (VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u }
428 };
429
430 vkd.cmdPipelineBarrier(*cmdBuffer,
431 (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
432 (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
433 (VkDependencyFlags)0u,
434 0u,
435 (const VkMemoryBarrier*)DE_NULL,
436 0u,
437 (const VkBufferMemoryBarrier*)DE_NULL,
438 1u,
439 &preCopyBarrier);
440 }
441
442 vkd.cmdCopyBufferToImage(*cmdBuffer, **stagingBuffers[planeNdx], image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, ©);
443
444 {
445 const VkImageMemoryBarrier postCopyBarrier =
446 {
447 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
448 DE_NULL,
449 VK_ACCESS_TRANSFER_WRITE_BIT,
450 nextAccess,
451 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
452 finalLayout,
453 VK_QUEUE_FAMILY_IGNORED,
454 VK_QUEUE_FAMILY_IGNORED,
455 image,
456 { (VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u }
457 };
458
459 vkd.cmdPipelineBarrier(*cmdBuffer,
460 (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
461 (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
462 (VkDependencyFlags)0u,
463 0u,
464 (const VkMemoryBarrier*)DE_NULL,
465 0u,
466 (const VkBufferMemoryBarrier*)DE_NULL,
467 1u,
468 &postCopyBarrier);
469 }
470
471 }
472
473 endCommandBuffer(vkd, *cmdBuffer);
474
475 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
476 }
477
fillImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,deUint32 queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,const MultiPlaneImageData & imageData,vk::VkAccessFlags nextAccess,vk::VkImageLayout finalLayout,deUint32 arrayLayer)478 void fillImageMemory (const vk::DeviceInterface& vkd,
479 vk::VkDevice device,
480 deUint32 queueFamilyNdx,
481 vk::VkImage image,
482 const std::vector<de::SharedPtr<vk::Allocation> >& allocations,
483 const MultiPlaneImageData& imageData,
484 vk::VkAccessFlags nextAccess,
485 vk::VkImageLayout finalLayout,
486 deUint32 arrayLayer)
487 {
488 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
489 const Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
490 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
491 const PlanarFormatDescription& formatDesc = imageData.getDescription();
492
493 for (deUint32 planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
494 {
495 const VkImageAspectFlagBits aspect = (formatDesc.numPlanes > 1)
496 ? getPlaneAspect(planeNdx)
497 : VK_IMAGE_ASPECT_COLOR_BIT;
498 const de::SharedPtr<Allocation>& allocation = allocations.size() > 1
499 ? allocations[planeNdx]
500 : allocations[0];
501 const size_t planeSize = imageData.getPlaneSize(planeNdx);
502 const deUint32 planeH = imageData.getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
503 const VkImageSubresource subresource =
504 {
505 static_cast<vk::VkImageAspectFlags>(aspect),
506 0u,
507 arrayLayer,
508 };
509 VkSubresourceLayout layout;
510
511 vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
512
513 for (deUint32 row = 0; row < planeH; ++row)
514 {
515 const size_t rowSize = planeSize / planeH;
516 void* const dstPtr = ((deUint8*)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
517 const void* const srcPtr = ((const deUint8*)imageData.getPlanePtr(planeNdx)) + row * rowSize;
518
519 deMemcpy(dstPtr, srcPtr, rowSize);
520 }
521 flushMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
522 }
523
524 beginCommandBuffer(vkd, *cmdBuffer);
525
526 {
527 const VkImageMemoryBarrier postCopyBarrier =
528 {
529 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
530 DE_NULL,
531 0u,
532 nextAccess,
533 VK_IMAGE_LAYOUT_PREINITIALIZED,
534 finalLayout,
535 VK_QUEUE_FAMILY_IGNORED,
536 VK_QUEUE_FAMILY_IGNORED,
537 image,
538 { VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, arrayLayer, 1u }
539 };
540
541 vkd.cmdPipelineBarrier(*cmdBuffer,
542 (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
543 (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
544 (VkDependencyFlags)0u,
545 0u,
546 (const VkMemoryBarrier*)DE_NULL,
547 0u,
548 (const VkBufferMemoryBarrier*)DE_NULL,
549 1u,
550 &postCopyBarrier);
551 }
552
553 endCommandBuffer(vkd, *cmdBuffer);
554
555 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
556 }
557
downloadImage(const DeviceInterface & vkd,VkDevice device,deUint32 queueFamilyNdx,Allocator & allocator,VkImage image,MultiPlaneImageData * imageData,VkAccessFlags prevAccess,VkImageLayout initialLayout,uint32_t baseArrayLayer)558 void downloadImage (const DeviceInterface& vkd,
559 VkDevice device,
560 deUint32 queueFamilyNdx,
561 Allocator& allocator,
562 VkImage image,
563 MultiPlaneImageData* imageData,
564 VkAccessFlags prevAccess,
565 VkImageLayout initialLayout,
566 uint32_t baseArrayLayer)
567 {
568 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
569 const Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
570 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
571 vector<VkBufferSp> stagingBuffers;
572 vector<AllocationSp> stagingMemory;
573
574 const PlanarFormatDescription& formatDesc = imageData->getDescription();
575
576 allocateStagingBuffers(vkd, device, allocator, *imageData, &stagingBuffers, &stagingMemory);
577
578 beginCommandBuffer(vkd, *cmdBuffer);
579
580 for (deUint32 planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
581 {
582 const VkImageAspectFlagBits aspect = (formatDesc.numPlanes > 1)
583 ? getPlaneAspect(planeNdx)
584 : VK_IMAGE_ASPECT_COLOR_BIT;
585 {
586 const VkImageMemoryBarrier preCopyBarrier =
587 {
588 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
589 DE_NULL,
590 prevAccess,
591 VK_ACCESS_TRANSFER_READ_BIT,
592 initialLayout,
593 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
594 VK_QUEUE_FAMILY_IGNORED,
595 VK_QUEUE_FAMILY_IGNORED,
596 image,
597 {
598 static_cast<vk::VkImageAspectFlags>(aspect),
599 0u,
600 1u,
601 baseArrayLayer,
602 1u
603 }
604 };
605
606 vkd.cmdPipelineBarrier(*cmdBuffer,
607 (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
608 (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
609 (VkDependencyFlags)0u,
610 0u,
611 (const VkMemoryBarrier*)DE_NULL,
612 0u,
613 (const VkBufferMemoryBarrier*)DE_NULL,
614 1u,
615 &preCopyBarrier);
616 }
617 {
618 const VkExtent3D imageExtent = makeExtent3D(imageData->getSize().x(), imageData->getSize().y(), 1u);
619 const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
620 const VkBufferImageCopy copy =
621 {
622 0u, // bufferOffset
623 0u, // bufferRowLength
624 0u, // bufferImageHeight
625 { (VkImageAspectFlags)aspect, 0u, baseArrayLayer, 1u },
626 makeOffset3D(0u, 0u, 0u),
627 planeExtent
628 };
629
630 vkd.cmdCopyImageToBuffer(*cmdBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **stagingBuffers[planeNdx], 1u, ©);
631 }
632 {
633 const VkBufferMemoryBarrier postCopyBarrier =
634 {
635 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
636 DE_NULL,
637 VK_ACCESS_TRANSFER_WRITE_BIT,
638 VK_ACCESS_HOST_READ_BIT,
639 VK_QUEUE_FAMILY_IGNORED,
640 VK_QUEUE_FAMILY_IGNORED,
641 **stagingBuffers[planeNdx],
642 0u,
643 VK_WHOLE_SIZE
644 };
645
646 vkd.cmdPipelineBarrier(*cmdBuffer,
647 (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
648 (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
649 (VkDependencyFlags)0u,
650 0u,
651 (const VkMemoryBarrier*)DE_NULL,
652 1u,
653 &postCopyBarrier,
654 0u,
655 (const VkImageMemoryBarrier*)DE_NULL);
656 }
657 }
658
659 endCommandBuffer(vkd, *cmdBuffer);
660
661 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
662
663 readStagingBuffers(imageData, vkd, device, stagingMemory);
664 }
665
readImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,deUint32 queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,MultiPlaneImageData * imageData,vk::VkAccessFlags prevAccess,vk::VkImageLayout initialLayout)666 void readImageMemory (const vk::DeviceInterface& vkd,
667 vk::VkDevice device,
668 deUint32 queueFamilyNdx,
669 vk::VkImage image,
670 const std::vector<de::SharedPtr<vk::Allocation> >& allocations,
671 MultiPlaneImageData* imageData,
672 vk::VkAccessFlags prevAccess,
673 vk::VkImageLayout initialLayout)
674 {
675 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
676 const Unique<VkCommandPool> cmdPool (createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
677 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
678 const PlanarFormatDescription& formatDesc = imageData->getDescription();
679
680 beginCommandBuffer(vkd, *cmdBuffer);
681
682 {
683 const VkImageMemoryBarrier preCopyBarrier =
684 {
685 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
686 DE_NULL,
687 prevAccess,
688 vk::VK_ACCESS_HOST_READ_BIT,
689 initialLayout,
690 VK_IMAGE_LAYOUT_GENERAL,
691 VK_QUEUE_FAMILY_IGNORED,
692 VK_QUEUE_FAMILY_IGNORED,
693 image,
694 { VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u }
695 };
696
697 vkd.cmdPipelineBarrier(*cmdBuffer,
698 (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
699 (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
700 (VkDependencyFlags)0u,
701 0u,
702 (const VkMemoryBarrier*)DE_NULL,
703 0u,
704 (const VkBufferMemoryBarrier*)DE_NULL,
705 1u,
706 &preCopyBarrier);
707 }
708
709 endCommandBuffer(vkd, *cmdBuffer);
710
711 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
712
713 for (deUint32 planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
714 {
715 const VkImageAspectFlagBits aspect = (formatDesc.numPlanes > 1)
716 ? getPlaneAspect(planeNdx)
717 : VK_IMAGE_ASPECT_COLOR_BIT;
718 const de::SharedPtr<Allocation>& allocation = allocations.size() > 1
719 ? allocations[planeNdx]
720 : allocations[0];
721 const size_t planeSize = imageData->getPlaneSize(planeNdx);
722 const deUint32 planeH = imageData->getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
723 const VkImageSubresource subresource =
724 {
725 static_cast<vk::VkImageAspectFlags>(aspect),
726 0u,
727 0u,
728 };
729 VkSubresourceLayout layout;
730
731 vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
732
733 invalidateMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
734
735 for (deUint32 row = 0; row < planeH; ++row)
736 {
737 const size_t rowSize = planeSize / planeH;
738 const void* const srcPtr = ((const deUint8*)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
739 void* const dstPtr = ((deUint8*)imageData->getPlanePtr(planeNdx)) + row * rowSize;
740
741 deMemcpy(dstPtr, srcPtr, rowSize);
742 }
743 }
744 }
745
746 // ChannelAccess utilities
747 namespace
748 {
749
750 //! Extend < 32b signed integer to 32b
signExtend(deUint32 src,int bits)751 inline deInt32 signExtend (deUint32 src, int bits)
752 {
753 const deUint32 signBit = 1u << (bits-1);
754
755 src |= ~((src & signBit) - 1);
756
757 return (deInt32)src;
758 }
759
divRoundUp(deUint32 a,deUint32 b)760 deUint32 divRoundUp (deUint32 a, deUint32 b)
761 {
762 if (a % b == 0)
763 return a / b;
764 else
765 return (a / b) + 1;
766 }
767
768 // \todo Taken from tcuTexture.cpp
769 // \todo [2011-09-21 pyry] Move to tcutil?
770 template <typename T>
convertSatRte(float f)771 inline T convertSatRte (float f)
772 {
773 // \note Doesn't work for 64-bit types
774 DE_STATIC_ASSERT(sizeof(T) < sizeof(deUint64));
775 DE_STATIC_ASSERT((-3 % 2 != 0) && (-4 % 2 == 0));
776
777 deInt64 minVal = std::numeric_limits<T>::min();
778 deInt64 maxVal = std::numeric_limits<T>::max();
779 float q = deFloatFrac(f);
780 deInt64 intVal = (deInt64)(f-q);
781
782 // Rounding.
783 if (q == 0.5f)
784 {
785 if (intVal % 2 != 0)
786 intVal++;
787 }
788 else if (q > 0.5f)
789 intVal++;
790 // else Don't add anything
791
792 // Saturate.
793 intVal = de::max(minVal, de::min(maxVal, intVal));
794
795 return (T)intVal;
796 }
797
798 } // anonymous
799
ChannelAccess(tcu::TextureChannelClass channelClass,deUint8 channelSize,const tcu::IVec3 & size,const tcu::IVec3 & bitPitch,void * data,deUint32 bitOffset)800 ChannelAccess::ChannelAccess (tcu::TextureChannelClass channelClass,
801 deUint8 channelSize,
802 const tcu::IVec3& size,
803 const tcu::IVec3& bitPitch,
804 void* data,
805 deUint32 bitOffset)
806 : m_channelClass (channelClass)
807 , m_channelSize (channelSize)
808 , m_size (size)
809 , m_bitPitch (bitPitch)
810 , m_data ((deUint8*)data + (bitOffset / 8))
811 , m_bitOffset (bitOffset % 8)
812 {
813 }
814
getChannelUint(const tcu::IVec3 & pos) const815 deUint32 ChannelAccess::getChannelUint (const tcu::IVec3& pos) const
816 {
817 DE_ASSERT(pos[0] < m_size[0]);
818 DE_ASSERT(pos[1] < m_size[1]);
819 DE_ASSERT(pos[2] < m_size[2]);
820
821 const deInt32 bitOffset (m_bitOffset + tcu::dot(m_bitPitch, pos));
822 const deUint8* const firstByte = ((const deUint8*)m_data) + (bitOffset / 8);
823 const deUint32 byteCount = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
824 const deUint32 mask (m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
825 const deUint32 offset = bitOffset % 8;
826 deUint32 bits = 0u;
827
828 deMemcpy(&bits, firstByte, byteCount);
829
830 return (bits >> offset) & mask;
831 }
832
setChannel(const tcu::IVec3 & pos,deUint32 x)833 void ChannelAccess::setChannel (const tcu::IVec3& pos, deUint32 x)
834 {
835 DE_ASSERT(pos[0] < m_size[0]);
836 DE_ASSERT(pos[1] < m_size[1]);
837 DE_ASSERT(pos[2] < m_size[2]);
838
839 const deInt32 bitOffset (m_bitOffset + tcu::dot(m_bitPitch, pos));
840 deUint8* const firstByte = ((deUint8*)m_data) + (bitOffset / 8);
841 const deUint32 byteCount = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
842 const deUint32 mask (m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
843 const deUint32 offset = bitOffset % 8;
844
845 const deUint32 bits = (x & mask) << offset;
846 deUint32 oldBits = 0;
847
848 deMemcpy(&oldBits, firstByte, byteCount);
849
850 {
851 const deUint32 newBits = bits | (oldBits & (~(mask << offset)));
852
853 deMemcpy(firstByte, &newBits, byteCount);
854 }
855 }
856
getChannel(const tcu::IVec3 & pos) const857 float ChannelAccess::getChannel (const tcu::IVec3& pos) const
858 {
859 const deUint32 bits (getChannelUint(pos));
860
861 switch (m_channelClass)
862 {
863 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
864 return (float)bits / (float)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u));
865
866 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
867 return (float)bits;
868
869 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
870 return de::max(-1.0f, (float)signExtend(bits, m_channelSize) / (float)((0x1u << (m_channelSize - 1u)) - 1u));
871
872 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
873 return (float)signExtend(bits, m_channelSize);
874
875 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
876 if (m_channelSize == 32)
877 return tcu::Float32(bits).asFloat();
878 else
879 {
880 DE_FATAL("Float type not supported");
881 return -1.0f;
882 }
883
884 default:
885 DE_FATAL("Unknown texture channel class");
886 return -1.0f;
887 }
888 }
889
getChannel(const tcu::FloatFormat & conversionFormat,const tcu::IVec3 & pos) const890 tcu::Interval ChannelAccess::getChannel (const tcu::FloatFormat& conversionFormat,
891 const tcu::IVec3& pos) const
892 {
893 const deUint32 bits (getChannelUint(pos));
894
895 switch (m_channelClass)
896 {
897 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
898 return conversionFormat.roundOut(conversionFormat.roundOut((double)bits, false)
899 / conversionFormat.roundOut((double)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u)), false), false);
900
901 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
902 return conversionFormat.roundOut((double)bits, false);
903
904 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
905 {
906 const tcu::Interval result (conversionFormat.roundOut(conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false)
907 / conversionFormat.roundOut((double)((0x1u << (m_channelSize - 1u)) - 1u), false), false));
908
909 return tcu::Interval(de::max(-1.0, result.lo()), de::max(-1.0, result.hi()));
910 }
911
912 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
913 return conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false);
914
915 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
916 if (m_channelSize == 32)
917 return conversionFormat.roundOut(tcu::Float32(bits).asFloat(), false);
918 else
919 {
920 DE_FATAL("Float type not supported");
921 return tcu::Interval();
922 }
923
924 default:
925 DE_FATAL("Unknown texture channel class");
926 return tcu::Interval();
927 }
928 }
929
setChannel(const tcu::IVec3 & pos,float x)930 void ChannelAccess::setChannel (const tcu::IVec3& pos, float x)
931 {
932 DE_ASSERT(pos[0] < m_size[0]);
933 DE_ASSERT(pos[1] < m_size[1]);
934 DE_ASSERT(pos[2] < m_size[2]);
935
936 const deUint32 mask (m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
937
938 switch (m_channelClass)
939 {
940 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
941 {
942 const deUint32 maxValue (mask);
943 const deUint32 value (de::min(maxValue, (deUint32)convertSatRte<deUint32>(x * (float)maxValue)));
944 setChannel(pos, value);
945 break;
946 }
947
948 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
949 {
950 const deInt32 range ((0x1u << (m_channelSize - 1u)) - 1u);
951 const deUint32 value ((deUint32)de::clamp<deInt32>(convertSatRte<deInt32>(x * (float)range), -range, range));
952 setChannel(pos, value);
953 break;
954 }
955
956 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
957 {
958 const deUint32 maxValue (mask);
959 const deUint32 value (de::min(maxValue, (deUint32)x));
960 setChannel(pos, value);
961 break;
962 }
963
964 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
965 {
966 const deInt32 minValue (-(deInt32)(1u << (m_channelSize - 1u)));
967 const deInt32 maxValue ((deInt32)((1u << (m_channelSize - 1u)) - 1u));
968 const deUint32 value ((deUint32)de::clamp((deInt32)x, minValue, maxValue));
969 setChannel(pos, value);
970 break;
971 }
972
973 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
974 {
975 if (m_channelSize == 32)
976 {
977 const deUint32 value = tcu::Float32(x).bits();
978 setChannel(pos, value);
979 }
980 else
981 DE_FATAL("Float type not supported");
982 break;
983 }
984
985 default:
986 DE_FATAL("Unknown texture channel class");
987 }
988 }
989
getChannelAccess(MultiPlaneImageData & data,const vk::PlanarFormatDescription & formatInfo,const UVec2 & size,int channelNdx)990 ChannelAccess getChannelAccess (MultiPlaneImageData& data,
991 const vk::PlanarFormatDescription& formatInfo,
992 const UVec2& size,
993 int channelNdx)
994 {
995 DE_ASSERT(formatInfo.hasChannelNdx(channelNdx));
996
997 const deUint32 planeNdx = formatInfo.channels[channelNdx].planeNdx;
998 const deUint32 valueOffsetBits = formatInfo.channels[channelNdx].offsetBits;
999 const deUint32 pixelStrideBytes = formatInfo.channels[channelNdx].strideBytes;
1000 const deUint32 pixelStrideBits = pixelStrideBytes * 8;
1001 const deUint8 sizeBits = formatInfo.channels[channelNdx].sizeBits;
1002
1003 DE_ASSERT(size.x() % (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor) == 0);
1004 DE_ASSERT(size.y() % (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor) == 0);
1005
1006 deUint32 accessWidth = size.x() / ( formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor );
1007 const deUint32 accessHeight = size.y() / ( formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor );
1008 const deUint32 elementSizeBytes = formatInfo.planes[planeNdx].elementSizeBytes;
1009 const deUint32 rowPitch = formatInfo.planes[planeNdx].elementSizeBytes * accessWidth;
1010 const deUint32 rowPitchBits = rowPitch * 8;
1011
1012 if (pixelStrideBytes != elementSizeBytes)
1013 {
1014 DE_ASSERT(elementSizeBytes % pixelStrideBytes == 0);
1015 accessWidth *= elementSizeBytes/pixelStrideBytes;
1016 }
1017
1018 return ChannelAccess((tcu::TextureChannelClass)formatInfo.channels[channelNdx].type, sizeBits, tcu::IVec3(accessWidth, accessHeight, 1u), tcu::IVec3((int)pixelStrideBits, (int)rowPitchBits, 0), data.getPlanePtr(planeNdx), (deUint32)valueOffsetBits);
1019 }
1020
isXChromaSubsampled(vk::VkFormat format)1021 bool isXChromaSubsampled (vk::VkFormat format)
1022 {
1023 switch (format)
1024 {
1025 case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1026 case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1027 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1028 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1029 case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1030 case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1031 case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1032 case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1033 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1034 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1035 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1036 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1037 case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1038 case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1039 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1040 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1041 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1042 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1043 case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1044 case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1045 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1046 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1047 case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1048 case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1049 return true;
1050
1051 default:
1052 return false;
1053 }
1054 }
1055
isYChromaSubsampled(vk::VkFormat format)1056 bool isYChromaSubsampled (vk::VkFormat format)
1057 {
1058 switch (format)
1059 {
1060 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1061 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1062 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1063 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1064 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1065 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1066 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1067 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1068 return true;
1069
1070 default:
1071 return false;
1072 }
1073 }
1074
areLsb6BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1075 bool areLsb6BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1076 {
1077 if ((srcFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) ||
1078 (dstFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) ||
1079 (srcFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
1080 (dstFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
1081 (srcFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
1082 (dstFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
1083 (srcFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
1084 (dstFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
1085 (srcFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
1086 (dstFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
1087 (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
1088 (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
1089 (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
1090 (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
1091 (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
1092 (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
1093 (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
1094 (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
1095 (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16) ||
1096 (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16))
1097 {
1098 return true;
1099 }
1100
1101 return false;
1102 }
1103
areLsb4BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1104 bool areLsb4BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1105 {
1106 if ((srcFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) ||
1107 (dstFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) ||
1108 (srcFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1109 (dstFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1110 (srcFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1111 (dstFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1112 (srcFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1113 (dstFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1114 (srcFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1115 (dstFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1116 (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1117 (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1118 (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1119 (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1120 (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1121 (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1122 (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1123 (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1124 (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16) ||
1125 (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16))
1126 {
1127 return true;
1128 }
1129
1130 return false;
1131 }
1132
1133 // \note Used for range expansion
getYCbCrBitDepth(vk::VkFormat format)1134 tcu::UVec4 getYCbCrBitDepth (vk::VkFormat format)
1135 {
1136 switch (format)
1137 {
1138 case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1139 case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1140 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1141 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1142 case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1143 case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1144 case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1145 case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1146 return tcu::UVec4(8, 8, 8, 0);
1147
1148 case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1149 return tcu::UVec4(10, 0, 0, 0);
1150
1151 case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1152 return tcu::UVec4(10, 10, 0, 0);
1153
1154 case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1155 return tcu::UVec4(10, 10, 10, 10);
1156
1157 case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1158 case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1159 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1160 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1161 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1162 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1163 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1164 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1165 return tcu::UVec4(10, 10, 10, 0);
1166
1167 case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1168 return tcu::UVec4(12, 0, 0, 0);
1169
1170 case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1171 return tcu::UVec4(12, 12, 0, 0);
1172
1173 case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1174 case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1175 case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1176 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1177 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1178 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1179 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1180 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1181 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1182 return tcu::UVec4(12, 12, 12, 12);
1183
1184 case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1185 case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1186 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1187 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1188 case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1189 case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1190 case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1191 case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1192 return tcu::UVec4(16, 16, 16, 0);
1193
1194 default:
1195 return tcu::getTextureFormatBitDepth(vk::mapVkFormat(format)).cast<deUint32>();
1196 }
1197 }
1198
getPrecision(VkFormat format)1199 std::vector<tcu::FloatFormat> getPrecision (VkFormat format)
1200 {
1201 std::vector<FloatFormat> floatFormats;
1202 UVec4 channelDepth = getYCbCrBitDepth (format);
1203
1204 for (deUint32 channelIdx = 0; channelIdx < 4; channelIdx++)
1205 floatFormats.push_back(tcu::FloatFormat(0, 0, channelDepth[channelIdx], false, tcu::YES));
1206
1207 return floatFormats;
1208 }
1209
getYCbCrFormatChannelCount(vk::VkFormat format)1210 deUint32 getYCbCrFormatChannelCount (vk::VkFormat format)
1211 {
1212 switch (format)
1213 {
1214 case vk::VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1215 case vk::VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1216 case vk::VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1217 case vk::VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1218 case vk::VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1219 case vk::VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1220 case vk::VK_FORMAT_B8G8R8A8_UNORM:
1221 case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1222 case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1223 case vk::VK_FORMAT_R16G16B16A16_UNORM:
1224 case vk::VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1225 case vk::VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1226 case vk::VK_FORMAT_R8G8B8A8_UNORM:
1227 return 4;
1228
1229 case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1230 case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1231 case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1232 case vk::VK_FORMAT_B5G6R5_UNORM_PACK16:
1233 case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1234 case vk::VK_FORMAT_B8G8R8_UNORM:
1235 case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1236 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1237 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1238 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1239 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1240 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1241 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1242 case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1243 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1244 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1245 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1246 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1247 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1248 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1249 case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1250 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1251 case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1252 case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1253 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1254 case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1255 case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1256 case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1257 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1258 case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1259 case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1260 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1261 case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1262 case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1263 case vk::VK_FORMAT_R16G16B16_UNORM:
1264 case vk::VK_FORMAT_R5G6B5_UNORM_PACK16:
1265 case vk::VK_FORMAT_R8G8B8_UNORM:
1266 return 3;
1267
1268 case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1269 case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1270 return 2;
1271
1272 case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1273 case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1274 return 1;
1275
1276 default:
1277 DE_FATAL("Unknown number of channels");
1278 return -1;
1279 }
1280 }
1281
1282 // YCbCr color conversion utilities
1283 namespace
1284 {
1285
rangeExpandChroma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const deUint32 bits,const tcu::Interval & sample)1286 tcu::Interval rangeExpandChroma (vk::VkSamplerYcbcrRange range,
1287 const tcu::FloatFormat& conversionFormat,
1288 const deUint32 bits,
1289 const tcu::Interval& sample)
1290 {
1291 const deUint32 values (0x1u << bits);
1292
1293 switch (range)
1294 {
1295 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1296 return conversionFormat.roundOut(sample - conversionFormat.roundOut(tcu::Interval((double)(0x1u << (bits - 1u)) / (double)((0x1u << bits) - 1u)), false), false);
1297
1298 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1299 {
1300 const tcu::Interval a (conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1301 const tcu::Interval dividend (conversionFormat.roundOut(a - tcu::Interval((double)(128u * (0x1u << (bits - 8u)))), false));
1302 const tcu::Interval divisor ((double)(224u * (0x1u << (bits - 8u))));
1303 const tcu::Interval result (conversionFormat.roundOut(dividend / divisor, false));
1304
1305 return result;
1306 }
1307
1308 default:
1309 DE_FATAL("Unknown YCbCrRange");
1310 return tcu::Interval();
1311 }
1312 }
1313
rangeExpandLuma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const deUint32 bits,const tcu::Interval & sample)1314 tcu::Interval rangeExpandLuma (vk::VkSamplerYcbcrRange range,
1315 const tcu::FloatFormat& conversionFormat,
1316 const deUint32 bits,
1317 const tcu::Interval& sample)
1318 {
1319 const deUint32 values (0x1u << bits);
1320
1321 switch (range)
1322 {
1323 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1324 return conversionFormat.roundOut(sample, false);
1325
1326 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1327 {
1328 const tcu::Interval a (conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1329 const tcu::Interval dividend (conversionFormat.roundOut(a - tcu::Interval((double)(16u * (0x1u << (bits - 8u)))), false));
1330 const tcu::Interval divisor ((double)(219u * (0x1u << (bits - 8u))));
1331 const tcu::Interval result (conversionFormat.roundOut(dividend / divisor, false));
1332
1333 return result;
1334 }
1335
1336 default:
1337 DE_FATAL("Unknown YCbCrRange");
1338 return tcu::Interval();
1339 }
1340 }
1341
clampMaybe(const tcu::Interval & x,double min,double max)1342 tcu::Interval clampMaybe (const tcu::Interval& x,
1343 double min,
1344 double max)
1345 {
1346 tcu::Interval result = x;
1347
1348 DE_ASSERT(min <= max);
1349
1350 if (x.lo() < min)
1351 result = result | tcu::Interval(min);
1352
1353 if (x.hi() > max)
1354 result = result | tcu::Interval(max);
1355
1356 return result;
1357 }
1358
convertColor(vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,const vector<tcu::FloatFormat> & conversionFormat,const tcu::UVec4 & bitDepth,const tcu::Interval input[4],tcu::Interval output[4])1359 void convertColor (vk::VkSamplerYcbcrModelConversion colorModel,
1360 vk::VkSamplerYcbcrRange range,
1361 const vector<tcu::FloatFormat>& conversionFormat,
1362 const tcu::UVec4& bitDepth,
1363 const tcu::Interval input[4],
1364 tcu::Interval output[4])
1365 {
1366 switch (colorModel)
1367 {
1368 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
1369 {
1370 for (size_t ndx = 0; ndx < 4; ndx++)
1371 output[ndx] = input[ndx];
1372 break;
1373 }
1374
1375 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
1376 {
1377 output[0] = clampMaybe(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]), -0.5, 0.5);
1378 output[1] = clampMaybe(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]), 0.0, 1.0);
1379 output[2] = clampMaybe(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]), -0.5, 0.5);
1380 output[3] = input[3];
1381 break;
1382 }
1383
1384 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
1385 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
1386 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
1387 {
1388 const tcu::Interval y (rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]));
1389 const tcu::Interval cr (rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]));
1390 const tcu::Interval cb (rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]));
1391
1392 const tcu::Interval yClamped (clampMaybe(y, 0.0, 1.0));
1393 const tcu::Interval crClamped (clampMaybe(cr, -0.5, 0.5));
1394 const tcu::Interval cbClamped (clampMaybe(cb, -0.5, 0.5));
1395
1396 if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601)
1397 {
1398 output[0] = conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.402 * crClamped, false), false);
1399 output[1] = conversionFormat[1].roundOut(conversionFormat[1].roundOut(yClamped - conversionFormat[1].roundOut((0.202008 / 0.587) * cbClamped, false), false) - conversionFormat[1].roundOut((0.419198 / 0.587) * crClamped, false), false);
1400 output[2] = conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.772 * cbClamped, false), false);
1401 }
1402 else if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709)
1403 {
1404 output[0] = conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.5748 * crClamped, false), false);
1405 output[1] = conversionFormat[1].roundOut(conversionFormat[1].roundOut(yClamped - conversionFormat[1].roundOut((0.13397432 / 0.7152) * cbClamped, false), false) - conversionFormat[1].roundOut((0.33480248 / 0.7152) * crClamped, false), false);
1406 output[2] = conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8556 * cbClamped, false), false);
1407 }
1408 else
1409 {
1410 output[0] = conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.4746 * crClamped, false), false);
1411 output[1] = conversionFormat[1].roundOut(conversionFormat[1].roundOut(yClamped - conversionFormat[1].roundOut(conversionFormat[1].roundOut(0.11156702 / 0.6780, false) * cbClamped, false), false) - conversionFormat[1].roundOut(conversionFormat[1].roundOut(0.38737742 / 0.6780, false) * crClamped, false), false);
1412 output[2] = conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8814 * cbClamped, false), false);
1413 }
1414 output[3] = input[3];
1415 break;
1416 }
1417
1418 default:
1419 DE_FATAL("Unknown YCbCrModel");
1420 }
1421
1422 if (colorModel != vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
1423 {
1424 for (int ndx = 0; ndx < 3; ndx++)
1425 output[ndx] = clampMaybe(output[ndx], 0.0, 1.0);
1426 }
1427 }
1428
mirror(int coord)1429 int mirror (int coord)
1430 {
1431 return coord >= 0 ? coord : -(1 + coord);
1432 }
1433
imod(int a,int b)1434 int imod (int a, int b)
1435 {
1436 int m = a % b;
1437 return m < 0 ? m + b : m;
1438 }
1439
frac(const tcu::Interval & x)1440 tcu::Interval frac (const tcu::Interval& x)
1441 {
1442 if (x.hi() - x.lo() >= 1.0)
1443 return tcu::Interval(0.0, 1.0);
1444 else
1445 {
1446 const tcu::Interval ret (deFrac(x.lo()), deFrac(x.hi()));
1447
1448 return ret;
1449 }
1450 }
1451
calculateUV(const tcu::FloatFormat & coordFormat,const tcu::Interval & st,const int size)1452 tcu::Interval calculateUV (const tcu::FloatFormat& coordFormat,
1453 const tcu::Interval& st,
1454 const int size)
1455 {
1456 return coordFormat.roundOut(coordFormat.roundOut(st, false) * tcu::Interval((double)size), false);
1457 }
1458
calculateNearestIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1459 tcu::IVec2 calculateNearestIJRange (const tcu::FloatFormat& coordFormat,
1460 const tcu::Interval& uv)
1461 {
1462 const tcu::Interval ij (coordFormat.roundOut(coordFormat.roundOut(uv, false) - tcu::Interval(0.5), false));
1463
1464 return tcu::IVec2(deRoundToInt32(ij.lo() - coordFormat.ulp(ij.lo(), 1)), deRoundToInt32(ij.hi() + coordFormat.ulp(ij.hi(), 1)));
1465 }
1466
1467 // Calculate range of pixel coordinates that can be used as lower coordinate for linear sampling
calculateLinearIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1468 tcu::IVec2 calculateLinearIJRange (const tcu::FloatFormat& coordFormat,
1469 const tcu::Interval& uv)
1470 {
1471 const tcu::Interval ij (coordFormat.roundOut(uv - tcu::Interval(0.5), false));
1472
1473 return tcu::IVec2(deFloorToInt32(ij.lo()), deFloorToInt32(ij.hi()));
1474 }
1475
calculateIJRange(vk::VkFilter filter,const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1476 tcu::IVec2 calculateIJRange (vk::VkFilter filter,
1477 const tcu::FloatFormat& coordFormat,
1478 const tcu::Interval& uv)
1479 {
1480 DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1481 return (filter == vk::VK_FILTER_LINEAR) ? calculateLinearIJRange(coordFormat, uv)
1482 : calculateNearestIJRange(coordFormat, uv);
1483 }
1484
calculateAB(const deUint32 subTexelPrecisionBits,const tcu::Interval & uv,int ij)1485 tcu::Interval calculateAB (const deUint32 subTexelPrecisionBits,
1486 const tcu::Interval& uv,
1487 int ij)
1488 {
1489 const deUint32 subdivisions = 0x1u << subTexelPrecisionBits;
1490 const tcu::Interval ab (frac((uv - 0.5) & tcu::Interval((double)ij, (double)(ij + 1))));
1491 const tcu::Interval gridAB (ab * tcu::Interval(subdivisions));
1492 const tcu::Interval rounded (de::max(deFloor(gridAB.lo()) / subdivisions, 0.0) , de::min(deCeil(gridAB.hi()) / subdivisions, 1.0));
1493
1494 return rounded;
1495 }
1496
lookupWrapped(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord)1497 tcu::Interval lookupWrapped (const ChannelAccess& access,
1498 const tcu::FloatFormat& conversionFormat,
1499 vk::VkSamplerAddressMode addressModeU,
1500 vk::VkSamplerAddressMode addressModeV,
1501 const tcu::IVec2& coord)
1502 {
1503 return access.getChannel(conversionFormat,
1504 tcu::IVec3(wrap(addressModeU, coord.x(), access.getSize().x()), wrap(addressModeV, coord.y(), access.getSize().y()), 0));
1505 }
1506
linearInterpolate(const tcu::FloatFormat & filteringFormat,const tcu::Interval & a,const tcu::Interval & b,const tcu::Interval & p00,const tcu::Interval & p10,const tcu::Interval & p01,const tcu::Interval & p11)1507 tcu::Interval linearInterpolate (const tcu::FloatFormat& filteringFormat,
1508 const tcu::Interval& a,
1509 const tcu::Interval& b,
1510 const tcu::Interval& p00,
1511 const tcu::Interval& p10,
1512 const tcu::Interval& p01,
1513 const tcu::Interval& p11)
1514 {
1515 const tcu::Interval p[4] =
1516 {
1517 p00,
1518 p10,
1519 p01,
1520 p11
1521 };
1522 tcu::Interval result (0.0);
1523
1524 for (size_t ndx = 0; ndx < 4; ndx++)
1525 {
1526 const tcu::Interval weightA (filteringFormat.roundOut((ndx % 2) == 0 ? (1.0 - a) : a, false));
1527 const tcu::Interval weightB (filteringFormat.roundOut((ndx / 2) == 0 ? (1.0 - b) : b, false));
1528 const tcu::Interval weight (filteringFormat.roundOut(weightA * weightB, false));
1529
1530 result = filteringFormat.roundOut(result + filteringFormat.roundOut(p[ndx] * weight, false), false);
1531 }
1532
1533 return result;
1534 }
1535
calculateImplicitChromaUV(const tcu::FloatFormat & coordFormat,vk::VkChromaLocation offset,const tcu::Interval & uv)1536 tcu::Interval calculateImplicitChromaUV (const tcu::FloatFormat& coordFormat,
1537 vk::VkChromaLocation offset,
1538 const tcu::Interval& uv)
1539 {
1540 if (offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN)
1541 return coordFormat.roundOut(0.5 * coordFormat.roundOut(uv + 0.5, false), false);
1542 else
1543 return coordFormat.roundOut(0.5 * uv, false);
1544 }
1545
linearSample(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,const tcu::FloatFormat & filteringFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord,const tcu::Interval & a,const tcu::Interval & b)1546 tcu::Interval linearSample (const ChannelAccess& access,
1547 const tcu::FloatFormat& conversionFormat,
1548 const tcu::FloatFormat& filteringFormat,
1549 vk::VkSamplerAddressMode addressModeU,
1550 vk::VkSamplerAddressMode addressModeV,
1551 const tcu::IVec2& coord,
1552 const tcu::Interval& a,
1553 const tcu::Interval& b)
1554 {
1555 return linearInterpolate(filteringFormat, a, b,
1556 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 0)),
1557 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 0)),
1558 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 1)),
1559 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 1)));
1560 }
1561
reconstructLinearXChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation offset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1562 tcu::Interval reconstructLinearXChromaSample (const tcu::FloatFormat& filteringFormat,
1563 const tcu::FloatFormat& conversionFormat,
1564 vk::VkChromaLocation offset,
1565 vk::VkSamplerAddressMode addressModeU,
1566 vk::VkSamplerAddressMode addressModeV,
1567 const ChannelAccess& access,
1568 int i,
1569 int j)
1570 {
1571 const int subI = offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1572 ? divFloor(i, 2)
1573 : (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1574 const double a = offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1575 ? (i % 2 == 0 ? 0.0 : 0.5)
1576 : (i % 2 == 0 ? 0.25 : 0.75);
1577
1578 const tcu::Interval A (filteringFormat.roundOut( a * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI, j)), false));
1579 const tcu::Interval B (filteringFormat.roundOut((1.0 - a) * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI + 1, j)), false));
1580 return filteringFormat.roundOut(A + B, false);
1581 }
1582
reconstructLinearXYChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation xOffset,vk::VkChromaLocation yOffset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1583 tcu::Interval reconstructLinearXYChromaSample (const tcu::FloatFormat& filteringFormat,
1584 const tcu::FloatFormat& conversionFormat,
1585 vk::VkChromaLocation xOffset,
1586 vk::VkChromaLocation yOffset,
1587 vk::VkSamplerAddressMode addressModeU,
1588 vk::VkSamplerAddressMode addressModeV,
1589 const ChannelAccess& access,
1590 int i,
1591 int j)
1592 {
1593 const int subI = xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1594 ? divFloor(i, 2)
1595 : (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1596 const int subJ = yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1597 ? divFloor(j, 2)
1598 : (j % 2 == 0 ? divFloor(j, 2) - 1 : divFloor(j, 2));
1599
1600 const double a = xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1601 ? (i % 2 == 0 ? 0.0 : 0.5)
1602 : (i % 2 == 0 ? 0.25 : 0.75);
1603 const double b = yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1604 ? (j % 2 == 0 ? 0.0 : 0.5)
1605 : (j % 2 == 0 ? 0.25 : 0.75);
1606
1607 return linearSample(access, conversionFormat, filteringFormat, addressModeU, addressModeV, tcu::IVec2(subI, subJ), a, b);
1608 }
1609
swizzle(vk::VkComponentSwizzle swizzle,const ChannelAccess & identityPlane,const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane)1610 const ChannelAccess& swizzle (vk::VkComponentSwizzle swizzle,
1611 const ChannelAccess& identityPlane,
1612 const ChannelAccess& rPlane,
1613 const ChannelAccess& gPlane,
1614 const ChannelAccess& bPlane,
1615 const ChannelAccess& aPlane)
1616 {
1617 switch (swizzle)
1618 {
1619 case vk::VK_COMPONENT_SWIZZLE_IDENTITY: return identityPlane;
1620 case vk::VK_COMPONENT_SWIZZLE_R: return rPlane;
1621 case vk::VK_COMPONENT_SWIZZLE_G: return gPlane;
1622 case vk::VK_COMPONENT_SWIZZLE_B: return bPlane;
1623 case vk::VK_COMPONENT_SWIZZLE_A: return aPlane;
1624
1625 default:
1626 DE_FATAL("Unsupported swizzle");
1627 return identityPlane;
1628 }
1629 }
1630
1631 } // anonymous
1632
wrap(vk::VkSamplerAddressMode addressMode,int coord,int size)1633 int wrap (vk::VkSamplerAddressMode addressMode,
1634 int coord,
1635 int size)
1636 {
1637 switch (addressMode)
1638 {
1639 case vk::VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
1640 return (size - 1) - mirror(imod(coord, 2 * size) - size);
1641
1642 case vk::VK_SAMPLER_ADDRESS_MODE_REPEAT:
1643 return imod(coord, size);
1644
1645 case vk::VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
1646 return de::clamp(coord, 0, size - 1);
1647
1648 case vk::VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
1649 return de::clamp(mirror(coord), 0, size - 1);
1650
1651 default:
1652 DE_FATAL("Unknown wrap mode");
1653 return ~0;
1654 }
1655 }
1656
divFloor(int a,int b)1657 int divFloor (int a, int b)
1658 {
1659 if (a % b == 0)
1660 return a / b;
1661 else if (a > 0)
1662 return a / b;
1663 else
1664 return (a / b) - 1;
1665 }
1666
calculateBounds(const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane,const UVec4 & bitDepth,const vector<Vec2> & sts,const vector<FloatFormat> & filteringFormat,const vector<FloatFormat> & conversionFormat,const deUint32 subTexelPrecisionBits,vk::VkFilter filter,vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,vk::VkFilter chromaFilter,vk::VkChromaLocation xChromaOffset,vk::VkChromaLocation yChromaOffset,const vk::VkComponentMapping & componentMapping,bool explicitReconstruction,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,std::vector<Vec4> & minBounds,std::vector<Vec4> & maxBounds,std::vector<Vec4> & uvBounds,std::vector<IVec4> & ijBounds)1667 void calculateBounds (const ChannelAccess& rPlane,
1668 const ChannelAccess& gPlane,
1669 const ChannelAccess& bPlane,
1670 const ChannelAccess& aPlane,
1671 const UVec4& bitDepth,
1672 const vector<Vec2>& sts,
1673 const vector<FloatFormat>& filteringFormat,
1674 const vector<FloatFormat>& conversionFormat,
1675 const deUint32 subTexelPrecisionBits,
1676 vk::VkFilter filter,
1677 vk::VkSamplerYcbcrModelConversion colorModel,
1678 vk::VkSamplerYcbcrRange range,
1679 vk::VkFilter chromaFilter,
1680 vk::VkChromaLocation xChromaOffset,
1681 vk::VkChromaLocation yChromaOffset,
1682 const vk::VkComponentMapping& componentMapping,
1683 bool explicitReconstruction,
1684 vk::VkSamplerAddressMode addressModeU,
1685 vk::VkSamplerAddressMode addressModeV,
1686 std::vector<Vec4>& minBounds,
1687 std::vector<Vec4>& maxBounds,
1688 std::vector<Vec4>& uvBounds,
1689 std::vector<IVec4>& ijBounds)
1690 {
1691 const FloatFormat highp (-126, 127, 23, true,
1692 tcu::MAYBE, // subnormals
1693 tcu::YES, // infinities
1694 tcu::MAYBE); // NaN
1695 const FloatFormat coordFormat (-32, 32, 16, true);
1696 const ChannelAccess& rAccess (swizzle(componentMapping.r, rPlane, rPlane, gPlane, bPlane, aPlane));
1697 const ChannelAccess& gAccess (swizzle(componentMapping.g, gPlane, rPlane, gPlane, bPlane, aPlane));
1698 const ChannelAccess& bAccess (swizzle(componentMapping.b, bPlane, rPlane, gPlane, bPlane, aPlane));
1699 const ChannelAccess& aAccess (swizzle(componentMapping.a, aPlane, rPlane, gPlane, bPlane, aPlane));
1700
1701 const bool subsampledX = gAccess.getSize().x() > rAccess.getSize().x();
1702 const bool subsampledY = gAccess.getSize().y() > rAccess.getSize().y();
1703
1704 minBounds.resize(sts.size(), Vec4(TCU_INFINITY));
1705 maxBounds.resize(sts.size(), Vec4(-TCU_INFINITY));
1706
1707 uvBounds.resize(sts.size(), Vec4(TCU_INFINITY, -TCU_INFINITY, TCU_INFINITY, -TCU_INFINITY));
1708 ijBounds.resize(sts.size(), IVec4(0x7FFFFFFF, -1 -0x7FFFFFFF, 0x7FFFFFFF, -1 -0x7FFFFFFF));
1709
1710 // Chroma plane sizes must match
1711 DE_ASSERT(rAccess.getSize() == bAccess.getSize());
1712
1713 // Luma plane sizes must match
1714 DE_ASSERT(gAccess.getSize() == aAccess.getSize());
1715
1716 // Luma plane size must match chroma plane or be twice as big
1717 DE_ASSERT(rAccess.getSize().x() == gAccess.getSize().x() || 2 * rAccess.getSize().x() == gAccess.getSize().x());
1718 DE_ASSERT(rAccess.getSize().y() == gAccess.getSize().y() || 2 * rAccess.getSize().y() == gAccess.getSize().y());
1719
1720 DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1721 DE_ASSERT(chromaFilter == vk::VK_FILTER_NEAREST || chromaFilter == vk::VK_FILTER_LINEAR);
1722 DE_ASSERT(subsampledX || !subsampledY);
1723
1724
1725 for (size_t ndx = 0; ndx < sts.size(); ndx++)
1726 {
1727 const Vec2 st (sts[ndx]);
1728 Interval bounds[4];
1729
1730 const Interval u (calculateUV(coordFormat, st[0], gAccess.getSize().x()));
1731 const Interval v (calculateUV(coordFormat, st[1], gAccess.getSize().y()));
1732
1733 uvBounds[ndx][0] = (float)u.lo();
1734 uvBounds[ndx][1] = (float)u.hi();
1735
1736 uvBounds[ndx][2] = (float)v.lo();
1737 uvBounds[ndx][3] = (float)v.hi();
1738
1739 const IVec2 iRange (calculateIJRange(filter, coordFormat, u));
1740 const IVec2 jRange (calculateIJRange(filter, coordFormat, v));
1741
1742 ijBounds[ndx][0] = iRange[0];
1743 ijBounds[ndx][1] = iRange[1];
1744
1745 ijBounds[ndx][2] = jRange[0];
1746 ijBounds[ndx][3] = jRange[1];
1747
1748 for (int j = jRange.x(); j <= jRange.y(); j++)
1749 for (int i = iRange.x(); i <= iRange.y(); i++)
1750 {
1751 if (filter == vk::VK_FILTER_NEAREST)
1752 {
1753 const Interval gValue (lookupWrapped(gAccess, conversionFormat[1], addressModeU, addressModeV, IVec2(i, j)));
1754 const Interval aValue (lookupWrapped(aAccess, conversionFormat[3], addressModeU, addressModeV, IVec2(i, j)));
1755
1756 if (explicitReconstruction || !(subsampledX || subsampledY))
1757 {
1758 Interval rValue, bValue;
1759 if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1760 {
1761 // Reconstruct using nearest if needed, otherwise, just take what's already there.
1762 const int subI = subsampledX ? i / 2 : i;
1763 const int subJ = subsampledY ? j / 2 : j;
1764 rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2(subI, subJ));
1765 bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2(subI, subJ));
1766 }
1767 else // vk::VK_FILTER_LINEAR
1768 {
1769 if (subsampledY)
1770 {
1771 rValue = reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i, j);
1772 bValue = reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i, j);
1773 }
1774 else
1775 {
1776 rValue = reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i, j);
1777 bValue = reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i, j);
1778 }
1779 }
1780
1781 const Interval srcColor[] =
1782 {
1783 rValue,
1784 gValue,
1785 bValue,
1786 aValue
1787 };
1788 Interval dstColor[4];
1789
1790 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1791
1792 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1793 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1794 }
1795 else
1796 {
1797 const Interval chromaU (subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) : u);
1798 const Interval chromaV (subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) : v);
1799
1800 // Reconstructed chroma samples with implicit filtering
1801 const IVec2 chromaIRange (subsampledX ? calculateIJRange(chromaFilter, coordFormat, chromaU) : IVec2(i, i));
1802 const IVec2 chromaJRange (subsampledY ? calculateIJRange(chromaFilter, coordFormat, chromaV) : IVec2(j, j));
1803
1804 for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1805 for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1806 {
1807 Interval rValue, bValue;
1808
1809 if (chromaFilter == vk::VK_FILTER_NEAREST)
1810 {
1811 rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2(chromaI, chromaJ));
1812 bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2(chromaI, chromaJ));
1813 }
1814 else // vk::VK_FILTER_LINEAR
1815 {
1816 const Interval chromaA (calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1817 const Interval chromaB (calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1818
1819 rValue = linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU, addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1820 bValue = linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU, addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1821 }
1822
1823 const Interval srcColor[] =
1824 {
1825 rValue,
1826 gValue,
1827 bValue,
1828 aValue
1829 };
1830
1831 Interval dstColor[4];
1832 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1833
1834 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1835 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1836 }
1837 }
1838 }
1839 else // filter == vk::VK_FILTER_LINEAR
1840 {
1841 const Interval lumaA (calculateAB(subTexelPrecisionBits, u, i));
1842 const Interval lumaB (calculateAB(subTexelPrecisionBits, v, j));
1843
1844 const Interval gValue (linearSample(gAccess, conversionFormat[1], filteringFormat[1], addressModeU, addressModeV, IVec2(i, j), lumaA, lumaB));
1845 const Interval aValue (linearSample(aAccess, conversionFormat[3], filteringFormat[3], addressModeU, addressModeV, IVec2(i, j), lumaA, lumaB));
1846
1847 if (explicitReconstruction || !(subsampledX || subsampledY))
1848 {
1849 Interval rValue, bValue;
1850 if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1851 {
1852 rValue = linearInterpolate(filteringFormat[0], lumaA, lumaB,
1853 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1854 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1855 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1856 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1857 bValue = linearInterpolate(filteringFormat[2], lumaA, lumaB,
1858 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1859 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1860 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1861 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1862 }
1863 else // vk::VK_FILTER_LINEAR
1864 {
1865 if (subsampledY)
1866 {
1867 // Linear, Reconstructed xx chroma samples with explicit linear filtering
1868 rValue = linearInterpolate(filteringFormat[0], lumaA, lumaB,
1869 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i, j),
1870 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i + 1, j),
1871 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i , j + 1),
1872 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i + 1, j + 1));
1873 bValue = linearInterpolate(filteringFormat[2], lumaA, lumaB,
1874 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i, j),
1875 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i + 1, j),
1876 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i , j + 1),
1877 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i + 1, j + 1));
1878 }
1879 else
1880 {
1881 // Linear, Reconstructed x chroma samples with explicit linear filtering
1882 rValue = linearInterpolate(filteringFormat[0], lumaA, lumaB,
1883 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i, j),
1884 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i + 1, j),
1885 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i , j + 1),
1886 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i + 1, j + 1));
1887 bValue = linearInterpolate(filteringFormat[2], lumaA, lumaB,
1888 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i, j),
1889 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i + 1, j),
1890 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i , j + 1),
1891 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i + 1, j + 1));
1892 }
1893 }
1894
1895 const Interval srcColor[] =
1896 {
1897 rValue,
1898 gValue,
1899 bValue,
1900 aValue
1901 };
1902 Interval dstColor[4];
1903
1904 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1905
1906 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1907 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1908 }
1909 else
1910 {
1911 const Interval chromaU (subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) : u);
1912 const Interval chromaV (subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) : v);
1913
1914 // TODO: It looks incorrect to ignore the chroma filter here. Is it?
1915 const IVec2 chromaIRange (calculateNearestIJRange(coordFormat, chromaU));
1916 const IVec2 chromaJRange (calculateNearestIJRange(coordFormat, chromaV));
1917
1918 for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1919 for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1920 {
1921 Interval rValue, bValue;
1922
1923 if (chromaFilter == vk::VK_FILTER_NEAREST)
1924 {
1925 rValue = lookupWrapped(rAccess, conversionFormat[1], addressModeU, addressModeV, IVec2(chromaI, chromaJ));
1926 bValue = lookupWrapped(bAccess, conversionFormat[3], addressModeU, addressModeV, IVec2(chromaI, chromaJ));
1927 }
1928 else // vk::VK_FILTER_LINEAR
1929 {
1930 const Interval chromaA (calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1931 const Interval chromaB (calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1932
1933 rValue = linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU, addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1934 bValue = linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU, addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1935 }
1936
1937 const Interval srcColor[] =
1938 {
1939 rValue,
1940 gValue,
1941 bValue,
1942 aValue
1943 };
1944 Interval dstColor[4];
1945 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1946
1947 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1948 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1949 }
1950 }
1951 }
1952 }
1953
1954 minBounds[ndx] = Vec4((float)bounds[0].lo(), (float)bounds[1].lo(), (float)bounds[2].lo(), (float)bounds[3].lo());
1955 maxBounds[ndx] = Vec4((float)bounds[0].hi(), (float)bounds[1].hi(), (float)bounds[2].hi(), (float)bounds[3].hi());
1956 }
1957 }
1958
1959 } // ycbcr
1960
1961 } // vkt
1962