• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 Google Inc.
6  * Copyright (c) 2019 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief YCbCr Test Utilities
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktYCbCrUtil.hpp"
26 
27 #include "vkQueryUtil.hpp"
28 #include "vkRefUtil.hpp"
29 #include "vkTypeUtil.hpp"
30 #include "vkCmdUtil.hpp"
31 
32 #include "tcuTextureUtil.hpp"
33 #include "deMath.h"
34 #include "tcuFloat.hpp"
35 #include "tcuVector.hpp"
36 #include "tcuVectorUtil.hpp"
37 
38 #include "deSTLUtil.hpp"
39 #include "deUniquePtr.hpp"
40 
41 #include <limits>
42 
43 namespace vkt
44 {
45 namespace ycbcr
46 {
47 
48 using namespace vk;
49 
50 using de::MovePtr;
51 using std::string;
52 using std::vector;
53 using tcu::FloatFormat;
54 using tcu::Interval;
55 using tcu::IVec2;
56 using tcu::IVec4;
57 using tcu::UVec2;
58 using tcu::UVec4;
59 using tcu::Vec2;
60 using tcu::Vec4;
61 
62 // MultiPlaneImageData
63 
MultiPlaneImageData(VkFormat format,const UVec2 & size)64 MultiPlaneImageData::MultiPlaneImageData(VkFormat format, const UVec2 &size)
65     : m_format(format)
66     , m_description(getPlanarFormatDescription(format))
67     , m_size(size)
68 {
69     for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
70         m_planeData[planeNdx].resize(
71             getPlaneSizeInBytes(m_description, size, planeNdx, 0, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY));
72 }
73 
MultiPlaneImageData(const MultiPlaneImageData & other)74 MultiPlaneImageData::MultiPlaneImageData(const MultiPlaneImageData &other)
75     : m_format(other.m_format)
76     , m_description(other.m_description)
77     , m_size(other.m_size)
78 {
79     for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
80         m_planeData[planeNdx] = other.m_planeData[planeNdx];
81 }
82 
~MultiPlaneImageData(void)83 MultiPlaneImageData::~MultiPlaneImageData(void)
84 {
85 }
86 
getChannelAccess(uint32_t channelNdx)87 tcu::PixelBufferAccess MultiPlaneImageData::getChannelAccess(uint32_t channelNdx)
88 {
89     void *planePtrs[PlanarFormatDescription::MAX_PLANES];
90     uint32_t planeRowPitches[PlanarFormatDescription::MAX_PLANES];
91 
92     for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
93     {
94         const uint32_t planeW = m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
95         planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
96         planePtrs[planeNdx]       = &m_planeData[planeNdx][0];
97     }
98 
99     return vk::getChannelAccess(m_description, m_size, planeRowPitches, planePtrs, channelNdx);
100 }
101 
getChannelAccess(uint32_t channelNdx) const102 tcu::ConstPixelBufferAccess MultiPlaneImageData::getChannelAccess(uint32_t channelNdx) const
103 {
104     const void *planePtrs[PlanarFormatDescription::MAX_PLANES];
105     uint32_t planeRowPitches[PlanarFormatDescription::MAX_PLANES];
106 
107     for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
108     {
109         const uint32_t planeW = m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
110         planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
111         planePtrs[planeNdx]       = &m_planeData[planeNdx][0];
112     }
113 
114     return vk::getChannelAccess(m_description, m_size, planeRowPitches, planePtrs, channelNdx);
115 }
116 
117 // Misc utilities
118 
119 namespace
120 {
121 
allocateStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)122 void allocateStagingBuffers(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
123                             const MultiPlaneImageData &imageData, vector<VkBufferSp> *buffers,
124                             vector<AllocationSp> *allocations)
125 {
126     for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
127     {
128         const VkBufferCreateInfo bufferInfo = {
129             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
130             nullptr,
131             (VkBufferCreateFlags)0u,
132             (VkDeviceSize)imageData.getPlaneSize(planeNdx),
133             VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
134             VK_SHARING_MODE_EXCLUSIVE,
135             0u,
136             nullptr,
137         };
138         Move<VkBuffer> buffer(createBuffer(vkd, device, &bufferInfo));
139         MovePtr<Allocation> allocation(allocator.allocate(getBufferMemoryRequirements(vkd, device, *buffer),
140                                                           MemoryRequirement::HostVisible | MemoryRequirement::Any));
141 
142         VK_CHECK(vkd.bindBufferMemory(device, *buffer, allocation->getMemory(), allocation->getOffset()));
143 
144         buffers->push_back(VkBufferSp(new Unique<VkBuffer>(buffer)));
145         allocations->push_back(AllocationSp(allocation.release()));
146     }
147 }
148 
allocateAndWriteStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)149 void allocateAndWriteStagingBuffers(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
150                                     const MultiPlaneImageData &imageData, vector<VkBufferSp> *buffers,
151                                     vector<AllocationSp> *allocations)
152 {
153     allocateStagingBuffers(vkd, device, allocator, imageData, buffers, allocations);
154 
155     for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
156     {
157         deMemcpy((*allocations)[planeNdx]->getHostPtr(), imageData.getPlanePtr(planeNdx),
158                  imageData.getPlaneSize(planeNdx));
159         flushMappedMemoryRange(vkd, device, (*allocations)[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
160     }
161 }
162 
readStagingBuffers(MultiPlaneImageData * imageData,const DeviceInterface & vkd,VkDevice device,const vector<AllocationSp> & allocations)163 void readStagingBuffers(MultiPlaneImageData *imageData, const DeviceInterface &vkd, VkDevice device,
164                         const vector<AllocationSp> &allocations)
165 {
166     for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
167     {
168         invalidateMappedMemoryRange(vkd, device, allocations[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
169         deMemcpy(imageData->getPlanePtr(planeNdx), allocations[planeNdx]->getHostPtr(),
170                  imageData->getPlaneSize(planeNdx));
171     }
172 }
173 
174 } // namespace
175 
checkImageSupport(Context & context,VkFormat format,VkImageCreateFlags createFlags,VkImageTiling tiling)176 void checkImageSupport(Context &context, VkFormat format, VkImageCreateFlags createFlags, VkImageTiling tiling)
177 {
178     const bool disjoint                                           = (createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0;
179     const VkPhysicalDeviceSamplerYcbcrConversionFeatures features = context.getSamplerYcbcrConversionFeatures();
180 
181     if (features.samplerYcbcrConversion == VK_FALSE)
182         TCU_THROW(NotSupportedError, "samplerYcbcrConversion is not supported");
183 
184     if (disjoint)
185     {
186         context.requireDeviceFunctionality("VK_KHR_bind_memory2");
187         context.requireDeviceFunctionality("VK_KHR_get_memory_requirements2");
188     }
189 
190     {
191         const VkFormatProperties formatProperties =
192             getPhysicalDeviceFormatProperties(context.getInstanceInterface(), context.getPhysicalDevice(), format);
193         const VkFormatFeatureFlags featureFlags = tiling == VK_IMAGE_TILING_OPTIMAL ?
194                                                       formatProperties.optimalTilingFeatures :
195                                                       formatProperties.linearTilingFeatures;
196 
197         if ((featureFlags &
198              (VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT)) == 0)
199             TCU_THROW(NotSupportedError, "YCbCr conversion is not supported for format");
200 
201         if (disjoint && ((featureFlags & VK_FORMAT_FEATURE_DISJOINT_BIT) == 0))
202             TCU_THROW(NotSupportedError, "Disjoint planes are not supported for format");
203     }
204 }
205 
extractI420Frame(std::vector<uint8_t> & videoDataPtr,uint32_t frameNumber,uint32_t width,uint32_t height,vkt::ycbcr::MultiPlaneImageData * imageData,bool half_size)206 void extractI420Frame(std::vector<uint8_t> &videoDataPtr, uint32_t frameNumber, uint32_t width, uint32_t height,
207                       vkt::ycbcr::MultiPlaneImageData *imageData, bool half_size)
208 {
209     uint32_t uOffset   = width * height;
210     uint32_t vOffset   = uOffset + (uOffset / 4);
211     uint32_t frameSize = uOffset + (uOffset / 2);
212 
213     // Ensure the videoDataPtr is large enough for the requested frame
214     if (videoDataPtr.size() < (frameNumber + 1) * frameSize)
215     {
216         TCU_THROW(NotSupportedError, "Video data pointer content is too small for requested frame");
217     }
218 
219     const uint8_t *yPlane = videoDataPtr.data() + frameNumber * frameSize;
220     const uint8_t *uPlane = videoDataPtr.data() + frameNumber * frameSize + uOffset;
221     const uint8_t *vPlane = videoDataPtr.data() + frameNumber * frameSize + vOffset;
222 
223     uint8_t *yPlaneData  = static_cast<uint8_t *>(imageData->getPlanePtr(0));
224     uint8_t *uvPlaneData = static_cast<uint8_t *>(imageData->getPlanePtr(1));
225 
226     // If half_size is true, perform a simple 2x reduction
227     if (half_size)
228     {
229         for (uint32_t j = 0; j < height; j += 2)
230         {
231             for (uint32_t i = 0; i < width; i += 2)
232             {
233                 yPlaneData[(j / 2) * (width / 2) + (i / 2)] = yPlane[j * width + i];
234             }
235         }
236         for (uint32_t j = 0; j < height / 2; j += 2)
237         {
238             for (uint32_t i = 0; i < width / 2; i += 2)
239             {
240                 uint32_t reducedIndex = (j / 2) * (width / 4) + (i / 2);
241                 uint32_t fullIndex    = j * (width / 2) + i;
242 
243                 uvPlaneData[2 * reducedIndex]     = uPlane[fullIndex];
244                 uvPlaneData[2 * reducedIndex + 1] = vPlane[fullIndex];
245             }
246         }
247     }
248     else
249     {
250         // Writing NV12 frame
251         uint32_t yPlaneSize = width * height;
252         deMemcpy(yPlaneData, yPlane, yPlaneSize);
253 
254         uint32_t uvPlaneSize = yPlaneSize / 2;
255         for (uint32_t i = 0; i < uvPlaneSize; i += 2)
256         {
257             uvPlaneData[i]     = uPlane[i / 2];
258             uvPlaneData[i + 1] = vPlane[i / 2];
259         }
260     }
261 }
262 
fillRandomNoNaN(de::Random * randomGen,uint8_t * const data,uint32_t size,const vk::VkFormat format)263 void fillRandomNoNaN(de::Random *randomGen, uint8_t *const data, uint32_t size, const vk::VkFormat format)
264 {
265     bool isFloat    = false;
266     uint32_t stride = 1;
267 
268     switch (format)
269     {
270     case vk::VK_FORMAT_B10G11R11_UFLOAT_PACK32:
271         isFloat = true;
272         stride  = 1;
273         break;
274     case vk::VK_FORMAT_R16_SFLOAT:
275     case vk::VK_FORMAT_R16G16_SFLOAT:
276     case vk::VK_FORMAT_R16G16B16_SFLOAT:
277     case vk::VK_FORMAT_R16G16B16A16_SFLOAT:
278         isFloat = true;
279         stride  = 2;
280         break;
281     case vk::VK_FORMAT_R32_SFLOAT:
282     case vk::VK_FORMAT_R32G32_SFLOAT:
283     case vk::VK_FORMAT_R32G32B32_SFLOAT:
284     case vk::VK_FORMAT_R32G32B32A32_SFLOAT:
285         isFloat = true;
286         stride  = 4;
287         break;
288     case vk::VK_FORMAT_R64_SFLOAT:
289     case vk::VK_FORMAT_R64G64_SFLOAT:
290     case vk::VK_FORMAT_R64G64B64_SFLOAT:
291     case vk::VK_FORMAT_R64G64B64A64_SFLOAT:
292         isFloat = true;
293         stride  = 8;
294         break;
295     default:
296         stride = 1;
297         break;
298     }
299 
300     if (isFloat)
301     {
302         uint32_t ndx = 0;
303         for (; ndx < size - stride + 1; ndx += stride)
304         {
305             if (stride == 1)
306             {
307                 // Set first bit of each channel to 0 to avoid NaNs, only format is B10G11R11
308                 const uint8_t mask[] = {0x7F, 0xDF, 0xFB, 0xFF};
309                 // Apply mask for both endians
310                 data[ndx] = (randomGen->getUint8() & mask[ndx % 4]) & mask[3 - ndx % 4];
311             }
312             else if (stride == 2)
313             {
314                 tcu::float16_t *const ptr = reinterpret_cast<tcu::float16_t *>(&data[ndx]);
315                 *ptr                      = tcu::Float16(randomGen->getFloat()).bits();
316             }
317             else if (stride == 4)
318             {
319                 float *ptr = reinterpret_cast<float *>(&data[ndx]);
320                 *ptr       = randomGen->getFloat();
321             }
322             else if (stride == 8)
323             {
324                 double *ptr = reinterpret_cast<double *>(&data[ndx]);
325                 *ptr        = randomGen->getDouble();
326             }
327         }
328         while (ndx < size)
329         {
330             data[ndx] = 0;
331         }
332     }
333     else
334     {
335         for (uint32_t ndx = 0; ndx < size; ++ndx)
336         {
337             data[ndx] = randomGen->getUint8();
338         }
339     }
340 }
341 
342 // When noNan is true, fillRandom does not generate NaNs in float formats.
fillRandom(de::Random * randomGen,MultiPlaneImageData * imageData,const vk::VkFormat format,const bool noNan)343 void fillRandom(de::Random *randomGen, MultiPlaneImageData *imageData, const vk::VkFormat format, const bool noNan)
344 {
345     for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
346     {
347         const size_t planeSize  = imageData->getPlaneSize(planeNdx);
348         uint8_t *const planePtr = (uint8_t *)imageData->getPlanePtr(planeNdx);
349 
350         if (noNan)
351         {
352             fillRandomNoNaN(randomGen, planePtr, (uint32_t)planeSize, format);
353         }
354         else
355         {
356             for (size_t ndx = 0; ndx < planeSize; ++ndx)
357             {
358                 planePtr[ndx] = randomGen->getUint8();
359             }
360         }
361     }
362 }
363 
fillGradient(MultiPlaneImageData * imageData,const tcu::Vec4 & minVal,const tcu::Vec4 & maxVal)364 void fillGradient(MultiPlaneImageData *imageData, const tcu::Vec4 &minVal, const tcu::Vec4 &maxVal)
365 {
366     const PlanarFormatDescription &formatInfo = imageData->getDescription();
367 
368     // \todo [pyry] Optimize: no point in re-rendering source gradient for each channel.
369 
370     for (uint32_t channelNdx = 0; channelNdx < 4; channelNdx++)
371     {
372         if (formatInfo.hasChannelNdx(channelNdx))
373         {
374             const tcu::PixelBufferAccess channelAccess = imageData->getChannelAccess(channelNdx);
375             tcu::TextureLevel tmpTexture(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::FLOAT),
376                                          channelAccess.getWidth(), channelAccess.getHeight());
377             const tcu::ConstPixelBufferAccess tmpAccess = tmpTexture.getAccess();
378 
379             tcu::fillWithComponentGradients(tmpTexture, minVal, maxVal);
380 
381             for (int y = 0; y < channelAccess.getHeight(); ++y)
382                 for (int x = 0; x < channelAccess.getWidth(); ++x)
383                 {
384                     channelAccess.setPixel(tcu::Vec4(tmpAccess.getPixel(x, y)[channelNdx]), x, y);
385                 }
386         }
387     }
388 }
389 
fillZero(MultiPlaneImageData * imageData)390 void fillZero(MultiPlaneImageData *imageData)
391 {
392     for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
393         deMemset(imageData->getPlanePtr(planeNdx), 0, imageData->getPlaneSize(planeNdx));
394 }
395 
allocateAndBindImageMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkImage image,VkFormat format,VkImageCreateFlags createFlags,vk::MemoryRequirement requirement)396 vector<AllocationSp> allocateAndBindImageMemory(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
397                                                 VkImage image, VkFormat format, VkImageCreateFlags createFlags,
398                                                 vk::MemoryRequirement requirement)
399 {
400     vector<AllocationSp> allocations;
401 
402     if ((createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0)
403     {
404         const uint32_t numPlanes = getPlaneCount(format);
405 
406         bindImagePlanesMemory(vkd, device, image, numPlanes, allocations, allocator, requirement);
407     }
408     else
409     {
410         const VkMemoryRequirements reqs = getImageMemoryRequirements(vkd, device, image);
411 
412         allocations.push_back(AllocationSp(allocator.allocate(reqs, requirement).release()));
413 
414         VK_CHECK(vkd.bindImageMemory(device, image, allocations.back()->getMemory(), allocations.back()->getOffset()));
415     }
416 
417     return allocations;
418 }
419 // Accept only NV12
uploadImage(const DeviceInterface & vkd,VkDevice device,uint32_t queueFamilyNdx,Allocator & allocator,VkImage image,const MultiPlaneImageData & imageData,VkAccessFlags nextAccess,VkImageLayout finalLayout,uint32_t arrayLayer)420 void uploadImage(const DeviceInterface &vkd, VkDevice device, uint32_t queueFamilyNdx, Allocator &allocator,
421                  VkImage image, const MultiPlaneImageData &imageData, VkAccessFlags nextAccess,
422                  VkImageLayout finalLayout, uint32_t arrayLayer)
423 {
424     const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
425     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
426     const Unique<VkCommandBuffer> cmdBuffer(
427         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
428     vector<VkBufferSp> stagingBuffers;
429     vector<AllocationSp> stagingMemory;
430 
431     const PlanarFormatDescription &formatDesc = imageData.getDescription();
432 
433     allocateAndWriteStagingBuffers(vkd, device, allocator, imageData, &stagingBuffers, &stagingMemory);
434 
435     beginCommandBuffer(vkd, *cmdBuffer);
436 
437     for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
438     {
439         const VkImageAspectFlagBits aspect =
440             (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
441         const VkExtent3D imageExtent = makeExtent3D(imageData.getSize().x(), imageData.getSize().y(), 1u);
442         const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
443         const VkBufferImageCopy copy = {0u, // bufferOffset
444                                         0u, // bufferRowLength
445                                         0u, // bufferImageHeight
446                                         {(VkImageAspectFlags)aspect, 0u, arrayLayer, 1u},
447                                         makeOffset3D(0u, 0u, 0u),
448                                         planeExtent};
449 
450         {
451             const VkImageMemoryBarrier preCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
452                                                          nullptr,
453                                                          (VkAccessFlags)0,
454                                                          VK_ACCESS_TRANSFER_WRITE_BIT,
455                                                          VK_IMAGE_LAYOUT_UNDEFINED,
456                                                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
457                                                          VK_QUEUE_FAMILY_IGNORED,
458                                                          VK_QUEUE_FAMILY_IGNORED,
459                                                          image,
460                                                          {(VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u}};
461 
462             vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
463                                    (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0u, 0u,
464                                    nullptr, 0u, nullptr, 1u, &preCopyBarrier);
465         }
466 
467         vkd.cmdCopyBufferToImage(*cmdBuffer, **stagingBuffers[planeNdx], image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
468                                  1u, &copy);
469 
470         {
471             const VkImageMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
472                                                           nullptr,
473                                                           VK_ACCESS_TRANSFER_WRITE_BIT,
474                                                           nextAccess,
475                                                           VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
476                                                           finalLayout,
477                                                           VK_QUEUE_FAMILY_IGNORED,
478                                                           VK_QUEUE_FAMILY_IGNORED,
479                                                           image,
480                                                           {(VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u}};
481 
482             vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
483                                    (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, (VkDependencyFlags)0u, 0u,
484                                    nullptr, 0u, nullptr, 1u, &postCopyBarrier);
485         }
486     }
487 
488     endCommandBuffer(vkd, *cmdBuffer);
489 
490     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
491 }
492 
fillImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,uint32_t queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,const MultiPlaneImageData & imageData,vk::VkAccessFlags nextAccess,vk::VkImageLayout finalLayout,uint32_t arrayLayer)493 void fillImageMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, uint32_t queueFamilyNdx, vk::VkImage image,
494                      const std::vector<de::SharedPtr<vk::Allocation>> &allocations,
495                      const MultiPlaneImageData &imageData, vk::VkAccessFlags nextAccess, vk::VkImageLayout finalLayout,
496                      uint32_t arrayLayer)
497 {
498     const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
499     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
500     const Unique<VkCommandBuffer> cmdBuffer(
501         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
502     const PlanarFormatDescription &formatDesc = imageData.getDescription();
503 
504     for (uint32_t planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
505     {
506         const VkImageAspectFlagBits aspect =
507             (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
508         const de::SharedPtr<Allocation> &allocation = allocations.size() > 1 ? allocations[planeNdx] : allocations[0];
509         const size_t planeSize                      = imageData.getPlaneSize(planeNdx);
510         const uint32_t planeH                = imageData.getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
511         const VkImageSubresource subresource = {
512             static_cast<vk::VkImageAspectFlags>(aspect),
513             0u,
514             arrayLayer,
515         };
516         VkSubresourceLayout layout;
517 
518         vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
519 
520         for (uint32_t row = 0; row < planeH; ++row)
521         {
522             const size_t rowSize     = planeSize / planeH;
523             void *const dstPtr       = ((uint8_t *)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
524             const void *const srcPtr = ((const uint8_t *)imageData.getPlanePtr(planeNdx)) + row * rowSize;
525 
526             deMemcpy(dstPtr, srcPtr, rowSize);
527         }
528         flushMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
529     }
530 
531     beginCommandBuffer(vkd, *cmdBuffer);
532 
533     {
534         const VkImageMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
535                                                       nullptr,
536                                                       0u,
537                                                       nextAccess,
538                                                       VK_IMAGE_LAYOUT_PREINITIALIZED,
539                                                       finalLayout,
540                                                       VK_QUEUE_FAMILY_IGNORED,
541                                                       VK_QUEUE_FAMILY_IGNORED,
542                                                       image,
543                                                       {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, arrayLayer, 1u}};
544 
545         vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
546                                (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, (VkDependencyFlags)0u, 0u,
547                                nullptr, 0u, nullptr, 1u, &postCopyBarrier);
548     }
549 
550     endCommandBuffer(vkd, *cmdBuffer);
551 
552     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
553 }
554 
downloadImage(const DeviceInterface & vkd,VkDevice device,uint32_t queueFamilyNdx,Allocator & allocator,VkImage image,MultiPlaneImageData * imageData,VkAccessFlags prevAccess,VkImageLayout initialLayout,uint32_t baseArrayLayer)555 void downloadImage(const DeviceInterface &vkd, VkDevice device, uint32_t queueFamilyNdx, Allocator &allocator,
556                    VkImage image, MultiPlaneImageData *imageData, VkAccessFlags prevAccess, VkImageLayout initialLayout,
557                    uint32_t baseArrayLayer)
558 {
559     const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
560     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
561     const Unique<VkCommandBuffer> cmdBuffer(
562         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
563     vector<VkBufferSp> stagingBuffers;
564     vector<AllocationSp> stagingMemory;
565 
566     const PlanarFormatDescription &formatDesc = imageData->getDescription();
567 
568     allocateStagingBuffers(vkd, device, allocator, *imageData, &stagingBuffers, &stagingMemory);
569 
570     beginCommandBuffer(vkd, *cmdBuffer);
571 
572     for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
573     {
574         const VkImageAspectFlagBits aspect =
575             (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
576         {
577             const VkImageMemoryBarrier preCopyBarrier = {
578                 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
579                 nullptr,
580                 prevAccess,
581                 VK_ACCESS_TRANSFER_READ_BIT,
582                 initialLayout,
583                 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
584                 VK_QUEUE_FAMILY_IGNORED,
585                 VK_QUEUE_FAMILY_IGNORED,
586                 image,
587                 {static_cast<vk::VkImageAspectFlags>(aspect), 0u, 1u, baseArrayLayer, 1u}};
588 
589             vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
590                                    (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0u, 0u,
591                                    nullptr, 0u, nullptr, 1u, &preCopyBarrier);
592         }
593         {
594             const VkExtent3D imageExtent = makeExtent3D(imageData->getSize().x(), imageData->getSize().y(), 1u);
595             const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
596             const VkBufferImageCopy copy = {0u, // bufferOffset
597                                             0u, // bufferRowLength
598                                             0u, // bufferImageHeight
599                                             {(VkImageAspectFlags)aspect, 0u, baseArrayLayer, 1u},
600                                             makeOffset3D(0u, 0u, 0u),
601                                             planeExtent};
602 
603             vkd.cmdCopyImageToBuffer(*cmdBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
604                                      **stagingBuffers[planeNdx], 1u, &copy);
605         }
606         {
607             const VkBufferMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
608                                                            nullptr,
609                                                            VK_ACCESS_TRANSFER_WRITE_BIT,
610                                                            VK_ACCESS_HOST_READ_BIT,
611                                                            VK_QUEUE_FAMILY_IGNORED,
612                                                            VK_QUEUE_FAMILY_IGNORED,
613                                                            **stagingBuffers[planeNdx],
614                                                            0u,
615                                                            VK_WHOLE_SIZE};
616 
617             vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
618                                    (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0u, 0u, nullptr,
619                                    1u, &postCopyBarrier, 0u, nullptr);
620         }
621     }
622 
623     endCommandBuffer(vkd, *cmdBuffer);
624 
625     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
626 
627     readStagingBuffers(imageData, vkd, device, stagingMemory);
628 }
629 
readImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,uint32_t queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,MultiPlaneImageData * imageData,vk::VkAccessFlags prevAccess,vk::VkImageLayout initialLayout)630 void readImageMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, uint32_t queueFamilyNdx, vk::VkImage image,
631                      const std::vector<de::SharedPtr<vk::Allocation>> &allocations, MultiPlaneImageData *imageData,
632                      vk::VkAccessFlags prevAccess, vk::VkImageLayout initialLayout)
633 {
634     const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
635     const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
636     const Unique<VkCommandBuffer> cmdBuffer(
637         allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
638     const PlanarFormatDescription &formatDesc = imageData->getDescription();
639 
640     beginCommandBuffer(vkd, *cmdBuffer);
641 
642     {
643         const VkImageMemoryBarrier preCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
644                                                      nullptr,
645                                                      prevAccess,
646                                                      vk::VK_ACCESS_HOST_READ_BIT,
647                                                      initialLayout,
648                                                      VK_IMAGE_LAYOUT_GENERAL,
649                                                      VK_QUEUE_FAMILY_IGNORED,
650                                                      VK_QUEUE_FAMILY_IGNORED,
651                                                      image,
652                                                      {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
653 
654         vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
655                                (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0u, 0u, nullptr, 0u,
656                                nullptr, 1u, &preCopyBarrier);
657     }
658 
659     endCommandBuffer(vkd, *cmdBuffer);
660 
661     submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
662 
663     for (uint32_t planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
664     {
665         const VkImageAspectFlagBits aspect =
666             (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
667         const de::SharedPtr<Allocation> &allocation = allocations.size() > 1 ? allocations[planeNdx] : allocations[0];
668         const size_t planeSize                      = imageData->getPlaneSize(planeNdx);
669         const uint32_t planeH                = imageData->getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
670         const VkImageSubresource subresource = {
671             static_cast<vk::VkImageAspectFlags>(aspect),
672             0u,
673             0u,
674         };
675         VkSubresourceLayout layout;
676 
677         vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
678 
679         invalidateMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
680 
681         for (uint32_t row = 0; row < planeH; ++row)
682         {
683             const size_t rowSize = planeSize / planeH;
684             const void *const srcPtr =
685                 ((const uint8_t *)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
686             void *const dstPtr = ((uint8_t *)imageData->getPlanePtr(planeNdx)) + row * rowSize;
687 
688             deMemcpy(dstPtr, srcPtr, rowSize);
689         }
690     }
691 }
692 
693 // ChannelAccess utilities
694 namespace
695 {
696 
697 //! Extend < 32b signed integer to 32b
signExtend(uint32_t src,int bits)698 inline int32_t signExtend(uint32_t src, int bits)
699 {
700     const uint32_t signBit = 1u << (bits - 1);
701 
702     src |= ~((src & signBit) - 1);
703 
704     return (int32_t)src;
705 }
706 
divRoundUp(uint32_t a,uint32_t b)707 uint32_t divRoundUp(uint32_t a, uint32_t b)
708 {
709     if (a % b == 0)
710         return a / b;
711     else
712         return (a / b) + 1;
713 }
714 
715 // \todo Taken from tcuTexture.cpp
716 // \todo [2011-09-21 pyry] Move to tcutil?
717 template <typename T>
convertSatRte(float f)718 inline T convertSatRte(float f)
719 {
720     // \note Doesn't work for 64-bit types
721     DE_STATIC_ASSERT(sizeof(T) < sizeof(uint64_t));
722     DE_STATIC_ASSERT((-3 % 2 != 0) && (-4 % 2 == 0));
723 
724     int64_t minVal = std::numeric_limits<T>::min();
725     int64_t maxVal = std::numeric_limits<T>::max();
726     float q        = deFloatFrac(f);
727     int64_t intVal = (int64_t)(f - q);
728 
729     // Rounding.
730     if (q == 0.5f)
731     {
732         if (intVal % 2 != 0)
733             intVal++;
734     }
735     else if (q > 0.5f)
736         intVal++;
737     // else Don't add anything
738 
739     // Saturate.
740     intVal = de::max(minVal, de::min(maxVal, intVal));
741 
742     return (T)intVal;
743 }
744 
745 } // namespace
746 
ChannelAccess(tcu::TextureChannelClass channelClass,uint8_t channelSize,const tcu::IVec3 & size,const tcu::IVec3 & bitPitch,void * data,uint32_t bitOffset)747 ChannelAccess::ChannelAccess(tcu::TextureChannelClass channelClass, uint8_t channelSize, const tcu::IVec3 &size,
748                              const tcu::IVec3 &bitPitch, void *data, uint32_t bitOffset)
749     : m_channelClass(channelClass)
750     , m_channelSize(channelSize)
751     , m_size(size)
752     , m_bitPitch(bitPitch)
753     , m_data((uint8_t *)data + (bitOffset / 8))
754     , m_bitOffset(bitOffset % 8)
755 {
756 }
757 
getChannelUint(const tcu::IVec3 & pos) const758 uint32_t ChannelAccess::getChannelUint(const tcu::IVec3 &pos) const
759 {
760     DE_ASSERT(pos[0] < m_size[0]);
761     DE_ASSERT(pos[1] < m_size[1]);
762     DE_ASSERT(pos[2] < m_size[2]);
763 
764     const int32_t bitOffset(m_bitOffset + tcu::dot(m_bitPitch, pos));
765     const uint8_t *const firstByte = ((const uint8_t *)m_data) + (bitOffset / 8);
766     const uint32_t byteCount       = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
767     const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
768     const uint32_t offset = bitOffset % 8;
769     uint32_t bits         = 0u;
770 
771     deMemcpy(&bits, firstByte, byteCount);
772 
773     return (bits >> offset) & mask;
774 }
775 
setChannel(const tcu::IVec3 & pos,uint32_t x)776 void ChannelAccess::setChannel(const tcu::IVec3 &pos, uint32_t x)
777 {
778     DE_ASSERT(pos[0] < m_size[0]);
779     DE_ASSERT(pos[1] < m_size[1]);
780     DE_ASSERT(pos[2] < m_size[2]);
781 
782     const int32_t bitOffset(m_bitOffset + tcu::dot(m_bitPitch, pos));
783     uint8_t *const firstByte = ((uint8_t *)m_data) + (bitOffset / 8);
784     const uint32_t byteCount = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
785     const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
786     const uint32_t offset = bitOffset % 8;
787 
788     const uint32_t bits = (x & mask) << offset;
789     uint32_t oldBits    = 0;
790 
791     deMemcpy(&oldBits, firstByte, byteCount);
792 
793     {
794         const uint32_t newBits = bits | (oldBits & (~(mask << offset)));
795 
796         deMemcpy(firstByte, &newBits, byteCount);
797     }
798 }
799 
getChannel(const tcu::IVec3 & pos) const800 float ChannelAccess::getChannel(const tcu::IVec3 &pos) const
801 {
802     const uint32_t bits(getChannelUint(pos));
803 
804     switch (m_channelClass)
805     {
806     case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
807         return (float)bits / (float)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u));
808 
809     case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
810         return (float)bits;
811 
812     case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
813         return de::max(-1.0f, (float)signExtend(bits, m_channelSize) / (float)((0x1u << (m_channelSize - 1u)) - 1u));
814 
815     case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
816         return (float)signExtend(bits, m_channelSize);
817 
818     case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
819         if (m_channelSize == 32)
820             return tcu::Float32(bits).asFloat();
821         else
822         {
823             DE_FATAL("Float type not supported");
824             return -1.0f;
825         }
826 
827     default:
828         DE_FATAL("Unknown texture channel class");
829         return -1.0f;
830     }
831 }
832 
getChannel(const tcu::FloatFormat & conversionFormat,const tcu::IVec3 & pos) const833 tcu::Interval ChannelAccess::getChannel(const tcu::FloatFormat &conversionFormat, const tcu::IVec3 &pos) const
834 {
835     const uint32_t bits(getChannelUint(pos));
836 
837     switch (m_channelClass)
838     {
839     case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
840         return conversionFormat.roundOut(
841             conversionFormat.roundOut((double)bits, false) /
842                 conversionFormat.roundOut((double)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u)),
843                                           false),
844             false);
845 
846     case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
847         return conversionFormat.roundOut((double)bits, false);
848 
849     case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
850     {
851         const tcu::Interval result(conversionFormat.roundOut(
852             conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false) /
853                 conversionFormat.roundOut((double)((0x1u << (m_channelSize - 1u)) - 1u), false),
854             false));
855 
856         return tcu::Interval(de::max(-1.0, result.lo()), de::max(-1.0, result.hi()));
857     }
858 
859     case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
860         return conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false);
861 
862     case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
863         if (m_channelSize == 32)
864             return conversionFormat.roundOut(tcu::Float32(bits).asFloat(), false);
865         else
866         {
867             DE_FATAL("Float type not supported");
868             return tcu::Interval();
869         }
870 
871     default:
872         DE_FATAL("Unknown texture channel class");
873         return tcu::Interval();
874     }
875 }
876 
setChannel(const tcu::IVec3 & pos,float x)877 void ChannelAccess::setChannel(const tcu::IVec3 &pos, float x)
878 {
879     DE_ASSERT(pos[0] < m_size[0]);
880     DE_ASSERT(pos[1] < m_size[1]);
881     DE_ASSERT(pos[2] < m_size[2]);
882 
883     const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
884 
885     switch (m_channelClass)
886     {
887     case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
888     {
889         const uint32_t maxValue(mask);
890         const uint32_t value(de::min(maxValue, (uint32_t)convertSatRte<uint32_t>(x * (float)maxValue)));
891         setChannel(pos, value);
892         break;
893     }
894 
895     case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
896     {
897         const int32_t range((0x1u << (m_channelSize - 1u)) - 1u);
898         const uint32_t value((uint32_t)de::clamp<int32_t>(convertSatRte<int32_t>(x * (float)range), -range, range));
899         setChannel(pos, value);
900         break;
901     }
902 
903     case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
904     {
905         const uint32_t maxValue(mask);
906         const uint32_t value(de::min(maxValue, (uint32_t)x));
907         setChannel(pos, value);
908         break;
909     }
910 
911     case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
912     {
913         const int32_t minValue(-(int32_t)(1u << (m_channelSize - 1u)));
914         const int32_t maxValue((int32_t)((1u << (m_channelSize - 1u)) - 1u));
915         const uint32_t value((uint32_t)de::clamp((int32_t)x, minValue, maxValue));
916         setChannel(pos, value);
917         break;
918     }
919 
920     case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
921     {
922         if (m_channelSize == 32)
923         {
924             const uint32_t value = tcu::Float32(x).bits();
925             setChannel(pos, value);
926         }
927         else
928             DE_FATAL("Float type not supported");
929         break;
930     }
931 
932     default:
933         DE_FATAL("Unknown texture channel class");
934     }
935 }
936 
getChannelAccess(MultiPlaneImageData & data,const vk::PlanarFormatDescription & formatInfo,const UVec2 & size,int channelNdx)937 ChannelAccess getChannelAccess(MultiPlaneImageData &data, const vk::PlanarFormatDescription &formatInfo,
938                                const UVec2 &size, int channelNdx)
939 {
940     DE_ASSERT(formatInfo.hasChannelNdx(channelNdx));
941 
942     const uint32_t planeNdx         = formatInfo.channels[channelNdx].planeNdx;
943     const uint32_t valueOffsetBits  = formatInfo.channels[channelNdx].offsetBits;
944     const uint32_t pixelStrideBytes = formatInfo.channels[channelNdx].strideBytes;
945     const uint32_t pixelStrideBits  = pixelStrideBytes * 8;
946     const uint8_t sizeBits          = formatInfo.channels[channelNdx].sizeBits;
947 
948     DE_ASSERT(size.x() % (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor) == 0);
949     DE_ASSERT(size.y() % (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor) == 0);
950 
951     uint32_t accessWidth            = size.x() / (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor);
952     const uint32_t accessHeight     = size.y() / (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor);
953     const uint32_t elementSizeBytes = formatInfo.planes[planeNdx].elementSizeBytes;
954     const uint32_t rowPitch         = formatInfo.planes[planeNdx].elementSizeBytes * accessWidth;
955     const uint32_t rowPitchBits     = rowPitch * 8;
956 
957     if (pixelStrideBytes != elementSizeBytes)
958     {
959         DE_ASSERT(elementSizeBytes % pixelStrideBytes == 0);
960         accessWidth *= elementSizeBytes / pixelStrideBytes;
961     }
962 
963     return ChannelAccess((tcu::TextureChannelClass)formatInfo.channels[channelNdx].type, sizeBits,
964                          tcu::IVec3(accessWidth, accessHeight, 1u),
965                          tcu::IVec3((int)pixelStrideBits, (int)rowPitchBits, 0), data.getPlanePtr(planeNdx),
966                          (uint32_t)valueOffsetBits);
967 }
968 
isXChromaSubsampled(vk::VkFormat format)969 bool isXChromaSubsampled(vk::VkFormat format)
970 {
971     switch (format)
972     {
973     case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
974     case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
975     case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
976     case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
977     case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
978     case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
979     case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
980     case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
981     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
982     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
983     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
984     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
985     case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
986     case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
987     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
988     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
989     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
990     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
991     case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
992     case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
993     case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
994     case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
995     case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
996     case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
997         return true;
998 
999     default:
1000         return false;
1001     }
1002 }
1003 
isYChromaSubsampled(vk::VkFormat format)1004 bool isYChromaSubsampled(vk::VkFormat format)
1005 {
1006     switch (format)
1007     {
1008     case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1009     case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1010     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1011     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1012     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1013     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1014     case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1015     case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1016         return true;
1017 
1018     default:
1019         return false;
1020     }
1021 }
1022 
areLsb6BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1023 bool areLsb6BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1024 {
1025     if ((srcFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) || (dstFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) ||
1026         (srcFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
1027         (dstFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
1028         (srcFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
1029         (dstFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
1030         (srcFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
1031         (dstFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
1032         (srcFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
1033         (dstFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
1034         (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
1035         (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
1036         (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
1037         (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
1038         (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
1039         (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
1040         (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
1041         (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
1042         (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16) ||
1043         (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16))
1044     {
1045         return true;
1046     }
1047 
1048     return false;
1049 }
1050 
areLsb4BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1051 bool areLsb4BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1052 {
1053     if ((srcFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) || (dstFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) ||
1054         (srcFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1055         (dstFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1056         (srcFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1057         (dstFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1058         (srcFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1059         (dstFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1060         (srcFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1061         (dstFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1062         (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1063         (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1064         (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1065         (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1066         (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1067         (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1068         (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1069         (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1070         (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16) ||
1071         (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16))
1072     {
1073         return true;
1074     }
1075 
1076     return false;
1077 }
1078 
1079 // \note Used for range expansion
getYCbCrBitDepth(vk::VkFormat format)1080 tcu::UVec4 getYCbCrBitDepth(vk::VkFormat format)
1081 {
1082     switch (format)
1083     {
1084     case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1085     case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1086     case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1087     case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1088     case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1089     case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1090     case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1091     case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1092         return tcu::UVec4(8, 8, 8, 0);
1093 
1094     case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1095         return tcu::UVec4(10, 0, 0, 0);
1096 
1097     case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1098         return tcu::UVec4(10, 10, 0, 0);
1099 
1100     case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1101         return tcu::UVec4(10, 10, 10, 10);
1102 
1103     case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1104     case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1105     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1106     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1107     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1108     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1109     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1110     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1111         return tcu::UVec4(10, 10, 10, 0);
1112 
1113     case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1114         return tcu::UVec4(12, 0, 0, 0);
1115 
1116     case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1117         return tcu::UVec4(12, 12, 0, 0);
1118 
1119     case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1120     case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1121     case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1122     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1123     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1124     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1125     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1126     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1127     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1128         return tcu::UVec4(12, 12, 12, 12);
1129 
1130     case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1131     case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1132     case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1133     case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1134     case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1135     case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1136     case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1137     case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1138         return tcu::UVec4(16, 16, 16, 0);
1139 
1140     default:
1141         return tcu::getTextureFormatBitDepth(vk::mapVkFormat(format)).cast<uint32_t>();
1142     }
1143 }
1144 
getPrecision(VkFormat format)1145 std::vector<tcu::FloatFormat> getPrecision(VkFormat format)
1146 {
1147     std::vector<FloatFormat> floatFormats;
1148     UVec4 channelDepth = getYCbCrBitDepth(format);
1149 
1150     for (uint32_t channelIdx = 0; channelIdx < 4; channelIdx++)
1151         floatFormats.push_back(tcu::FloatFormat(0, 0, channelDepth[channelIdx], false, tcu::YES));
1152 
1153     return floatFormats;
1154 }
1155 
getYCbCrFormatChannelCount(vk::VkFormat format)1156 uint32_t getYCbCrFormatChannelCount(vk::VkFormat format)
1157 {
1158     switch (format)
1159     {
1160     case vk::VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1161     case vk::VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1162     case vk::VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1163     case vk::VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1164     case vk::VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1165     case vk::VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1166     case vk::VK_FORMAT_B8G8R8A8_UNORM:
1167     case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1168     case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1169     case vk::VK_FORMAT_R16G16B16A16_UNORM:
1170     case vk::VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1171     case vk::VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1172     case vk::VK_FORMAT_R8G8B8A8_UNORM:
1173         return 4;
1174 
1175     case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1176     case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1177     case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1178     case vk::VK_FORMAT_B5G6R5_UNORM_PACK16:
1179     case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1180     case vk::VK_FORMAT_B8G8R8_UNORM:
1181     case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1182     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1183     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1184     case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1185     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1186     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1187     case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1188     case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1189     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1190     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1191     case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1192     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1193     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1194     case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1195     case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1196     case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1197     case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1198     case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1199     case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1200     case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1201     case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1202     case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1203     case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1204     case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1205     case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1206     case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1207     case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1208     case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1209     case vk::VK_FORMAT_R16G16B16_UNORM:
1210     case vk::VK_FORMAT_R5G6B5_UNORM_PACK16:
1211     case vk::VK_FORMAT_R8G8B8_UNORM:
1212         return 3;
1213 
1214     case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1215     case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1216         return 2;
1217 
1218     case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1219     case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1220         return 1;
1221 
1222     default:
1223         DE_FATAL("Unknown number of channels");
1224         return -1;
1225     }
1226 }
1227 
1228 // YCbCr color conversion utilities
1229 namespace
1230 {
1231 
rangeExpandChroma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const uint32_t bits,const tcu::Interval & sample)1232 tcu::Interval rangeExpandChroma(vk::VkSamplerYcbcrRange range, const tcu::FloatFormat &conversionFormat,
1233                                 const uint32_t bits, const tcu::Interval &sample)
1234 {
1235     const uint32_t values(0x1u << bits);
1236 
1237     switch (range)
1238     {
1239     case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1240         return conversionFormat.roundOut(
1241             sample - conversionFormat.roundOut(
1242                          tcu::Interval((double)(0x1u << (bits - 1u)) / (double)((0x1u << bits) - 1u)), false),
1243             false);
1244 
1245     case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1246     {
1247         const tcu::Interval a(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1248         const tcu::Interval dividend(
1249             conversionFormat.roundOut(a - tcu::Interval((double)(128u * (0x1u << (bits - 8u)))), false));
1250         const tcu::Interval divisor((double)(224u * (0x1u << (bits - 8u))));
1251         const tcu::Interval result(conversionFormat.roundOut(dividend / divisor, false));
1252 
1253         return result;
1254     }
1255 
1256     default:
1257         DE_FATAL("Unknown YCbCrRange");
1258         return tcu::Interval();
1259     }
1260 }
1261 
rangeExpandLuma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const uint32_t bits,const tcu::Interval & sample)1262 tcu::Interval rangeExpandLuma(vk::VkSamplerYcbcrRange range, const tcu::FloatFormat &conversionFormat,
1263                               const uint32_t bits, const tcu::Interval &sample)
1264 {
1265     const uint32_t values(0x1u << bits);
1266 
1267     switch (range)
1268     {
1269     case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1270         return conversionFormat.roundOut(sample, false);
1271 
1272     case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1273     {
1274         const tcu::Interval a(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1275         const tcu::Interval dividend(
1276             conversionFormat.roundOut(a - tcu::Interval((double)(16u * (0x1u << (bits - 8u)))), false));
1277         const tcu::Interval divisor((double)(219u * (0x1u << (bits - 8u))));
1278         const tcu::Interval result(conversionFormat.roundOut(dividend / divisor, false));
1279 
1280         return result;
1281     }
1282 
1283     default:
1284         DE_FATAL("Unknown YCbCrRange");
1285         return tcu::Interval();
1286     }
1287 }
1288 
clampMaybe(const tcu::Interval & x,double min,double max)1289 tcu::Interval clampMaybe(const tcu::Interval &x, double min, double max)
1290 {
1291     tcu::Interval result = x;
1292 
1293     DE_ASSERT(min <= max);
1294 
1295     if (x.lo() < min)
1296         result = result | tcu::Interval(min);
1297 
1298     if (x.hi() > max)
1299         result = result | tcu::Interval(max);
1300 
1301     return result;
1302 }
1303 
convertColor(vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,const vector<tcu::FloatFormat> & conversionFormat,const tcu::UVec4 & bitDepth,const tcu::Interval input[4],tcu::Interval output[4])1304 void convertColor(vk::VkSamplerYcbcrModelConversion colorModel, vk::VkSamplerYcbcrRange range,
1305                   const vector<tcu::FloatFormat> &conversionFormat, const tcu::UVec4 &bitDepth,
1306                   const tcu::Interval input[4], tcu::Interval output[4])
1307 {
1308     switch (colorModel)
1309     {
1310     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
1311     {
1312         for (size_t ndx = 0; ndx < 4; ndx++)
1313             output[ndx] = input[ndx];
1314         break;
1315     }
1316 
1317     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
1318     {
1319         output[0] = clampMaybe(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]), -0.5, 0.5);
1320         output[1] = clampMaybe(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]), 0.0, 1.0);
1321         output[2] = clampMaybe(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]), -0.5, 0.5);
1322         output[3] = input[3];
1323         break;
1324     }
1325 
1326     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
1327     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
1328     case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
1329     {
1330         const tcu::Interval y(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]));
1331         const tcu::Interval cr(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]));
1332         const tcu::Interval cb(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]));
1333 
1334         const tcu::Interval yClamped(clampMaybe(y, 0.0, 1.0));
1335         const tcu::Interval crClamped(clampMaybe(cr, -0.5, 0.5));
1336         const tcu::Interval cbClamped(clampMaybe(cb, -0.5, 0.5));
1337 
1338         if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601)
1339         {
1340             output[0] =
1341                 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.402 * crClamped, false), false);
1342             output[1] = conversionFormat[1].roundOut(
1343                 conversionFormat[1].roundOut(
1344                     yClamped - conversionFormat[1].roundOut((0.202008 / 0.587) * cbClamped, false), false) -
1345                     conversionFormat[1].roundOut((0.419198 / 0.587) * crClamped, false),
1346                 false);
1347             output[2] =
1348                 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.772 * cbClamped, false), false);
1349         }
1350         else if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709)
1351         {
1352             output[0] =
1353                 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.5748 * crClamped, false), false);
1354             output[1] = conversionFormat[1].roundOut(
1355                 conversionFormat[1].roundOut(
1356                     yClamped - conversionFormat[1].roundOut((0.13397432 / 0.7152) * cbClamped, false), false) -
1357                     conversionFormat[1].roundOut((0.33480248 / 0.7152) * crClamped, false),
1358                 false);
1359             output[2] =
1360                 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8556 * cbClamped, false), false);
1361         }
1362         else
1363         {
1364             output[0] =
1365                 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.4746 * crClamped, false), false);
1366             output[1] = conversionFormat[1].roundOut(
1367                 conversionFormat[1].roundOut(
1368                     yClamped - conversionFormat[1].roundOut(
1369                                    conversionFormat[1].roundOut(0.11156702 / 0.6780, false) * cbClamped, false),
1370                     false) -
1371                     conversionFormat[1].roundOut(conversionFormat[1].roundOut(0.38737742 / 0.6780, false) * crClamped,
1372                                                  false),
1373                 false);
1374             output[2] =
1375                 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8814 * cbClamped, false), false);
1376         }
1377         output[3] = input[3];
1378         break;
1379     }
1380 
1381     default:
1382         DE_FATAL("Unknown YCbCrModel");
1383     }
1384 
1385     if (colorModel != vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
1386     {
1387         for (int ndx = 0; ndx < 3; ndx++)
1388             output[ndx] = clampMaybe(output[ndx], 0.0, 1.0);
1389     }
1390 }
1391 
mirror(int coord)1392 int mirror(int coord)
1393 {
1394     return coord >= 0 ? coord : -(1 + coord);
1395 }
1396 
imod(int a,int b)1397 int imod(int a, int b)
1398 {
1399     int m = a % b;
1400     return m < 0 ? m + b : m;
1401 }
1402 
frac(const tcu::Interval & x)1403 tcu::Interval frac(const tcu::Interval &x)
1404 {
1405     if (x.hi() - x.lo() >= 1.0)
1406         return tcu::Interval(0.0, 1.0);
1407     else
1408     {
1409         const tcu::Interval ret(deFrac(x.lo()), deFrac(x.hi()));
1410 
1411         return ret;
1412     }
1413 }
1414 
calculateUV(const tcu::FloatFormat & coordFormat,const tcu::Interval & st,const int size)1415 tcu::Interval calculateUV(const tcu::FloatFormat &coordFormat, const tcu::Interval &st, const int size)
1416 {
1417     return coordFormat.roundOut(coordFormat.roundOut(st, false) * tcu::Interval((double)size), false);
1418 }
1419 
calculateNearestIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1420 tcu::IVec2 calculateNearestIJRange(const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1421 {
1422     const tcu::Interval ij(coordFormat.roundOut(coordFormat.roundOut(uv, false) - tcu::Interval(0.5), false));
1423 
1424     return tcu::IVec2(deRoundToInt32(ij.lo() - coordFormat.ulp(ij.lo(), 1)),
1425                       deRoundToInt32(ij.hi() + coordFormat.ulp(ij.hi(), 1)));
1426 }
1427 
1428 // Calculate range of pixel coordinates that can be used as lower coordinate for linear sampling
calculateLinearIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1429 tcu::IVec2 calculateLinearIJRange(const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1430 {
1431     const tcu::Interval ij(coordFormat.roundOut(uv - tcu::Interval(0.5), false));
1432 
1433     return tcu::IVec2(deFloorToInt32(ij.lo()), deFloorToInt32(ij.hi()));
1434 }
1435 
calculateIJRange(vk::VkFilter filter,const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1436 tcu::IVec2 calculateIJRange(vk::VkFilter filter, const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1437 {
1438     DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1439     return (filter == vk::VK_FILTER_LINEAR) ? calculateLinearIJRange(coordFormat, uv) :
1440                                               calculateNearestIJRange(coordFormat, uv);
1441 }
1442 
calculateAB(const uint32_t subTexelPrecisionBits,const tcu::Interval & uv,int ij)1443 tcu::Interval calculateAB(const uint32_t subTexelPrecisionBits, const tcu::Interval &uv, int ij)
1444 {
1445     const uint32_t subdivisions = 0x1u << subTexelPrecisionBits;
1446     const tcu::Interval ab(frac((uv - 0.5) & tcu::Interval((double)ij, (double)(ij + 1))));
1447     const tcu::Interval gridAB(ab * tcu::Interval(subdivisions));
1448     const tcu::Interval rounded(de::max(deFloor(gridAB.lo()) / subdivisions, 0.0),
1449                                 de::min(deCeil(gridAB.hi()) / subdivisions, 1.0));
1450 
1451     return rounded;
1452 }
1453 
lookupWrapped(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord)1454 tcu::Interval lookupWrapped(const ChannelAccess &access, const tcu::FloatFormat &conversionFormat,
1455                             vk::VkSamplerAddressMode addressModeU, vk::VkSamplerAddressMode addressModeV,
1456                             const tcu::IVec2 &coord)
1457 {
1458     tcu::Interval interval =
1459         access.getChannel(conversionFormat, tcu::IVec3(wrap(addressModeU, coord.x(), access.getSize().x()),
1460                                                        wrap(addressModeV, coord.y(), access.getSize().y()), 0));
1461 
1462     // Expand range for 10-bit conversions to +/-1.0 ULP
1463     if (conversionFormat.getFractionBits() == 10)
1464     {
1465         interval |= interval.lo() - interval.length() / 2.0;
1466         interval |= interval.hi() + interval.length() / 2.0;
1467     }
1468 
1469     return interval;
1470 }
1471 
linearInterpolate(const tcu::FloatFormat & filteringFormat,const tcu::Interval & a,const tcu::Interval & b,const tcu::Interval & p00,const tcu::Interval & p10,const tcu::Interval & p01,const tcu::Interval & p11)1472 tcu::Interval linearInterpolate(const tcu::FloatFormat &filteringFormat, const tcu::Interval &a, const tcu::Interval &b,
1473                                 const tcu::Interval &p00, const tcu::Interval &p10, const tcu::Interval &p01,
1474                                 const tcu::Interval &p11)
1475 {
1476     const tcu::Interval p[4] = {p00, p10, p01, p11};
1477     tcu::Interval result(0.0);
1478 
1479     for (size_t ndx = 0; ndx < 4; ndx++)
1480     {
1481         const tcu::Interval weightA(filteringFormat.roundOut((ndx % 2) == 0 ? (1.0 - a) : a, false));
1482         const tcu::Interval weightB(filteringFormat.roundOut((ndx / 2) == 0 ? (1.0 - b) : b, false));
1483         const tcu::Interval weight(filteringFormat.roundOut(weightA * weightB, false));
1484 
1485         result = filteringFormat.roundOut(result + filteringFormat.roundOut(p[ndx] * weight, false), false);
1486     }
1487 
1488     return result;
1489 }
1490 
calculateImplicitChromaUV(const tcu::FloatFormat & coordFormat,vk::VkChromaLocation offset,const tcu::Interval & uv)1491 tcu::Interval calculateImplicitChromaUV(const tcu::FloatFormat &coordFormat, vk::VkChromaLocation offset,
1492                                         const tcu::Interval &uv)
1493 {
1494     if (offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN)
1495         return coordFormat.roundOut(0.5 * coordFormat.roundOut(uv + 0.5, false), false);
1496     else
1497         return coordFormat.roundOut(0.5 * uv, false);
1498 }
1499 
linearSample(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,const tcu::FloatFormat & filteringFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord,const tcu::Interval & a,const tcu::Interval & b)1500 tcu::Interval linearSample(const ChannelAccess &access, const tcu::FloatFormat &conversionFormat,
1501                            const tcu::FloatFormat &filteringFormat, vk::VkSamplerAddressMode addressModeU,
1502                            vk::VkSamplerAddressMode addressModeV, const tcu::IVec2 &coord, const tcu::Interval &a,
1503                            const tcu::Interval &b)
1504 {
1505     return linearInterpolate(
1506         filteringFormat, a, b,
1507         lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 0)),
1508         lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 0)),
1509         lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 1)),
1510         lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 1)));
1511 }
1512 
reconstructLinearXChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation offset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1513 tcu::Interval reconstructLinearXChromaSample(const tcu::FloatFormat &filteringFormat,
1514                                              const tcu::FloatFormat &conversionFormat, vk::VkChromaLocation offset,
1515                                              vk::VkSamplerAddressMode addressModeU,
1516                                              vk::VkSamplerAddressMode addressModeV, const ChannelAccess &access, int i,
1517                                              int j)
1518 {
1519     const int subI = offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? divFloor(i, 2) :
1520                                                                      (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1521     const double a =
1522         offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (i % 2 == 0 ? 0.0 : 0.5) : (i % 2 == 0 ? 0.25 : 0.75);
1523 
1524     const tcu::Interval A(filteringFormat.roundOut(
1525         a * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI, j)), false));
1526     const tcu::Interval B(filteringFormat.roundOut(
1527         (1.0 - a) * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI + 1, j)),
1528         false));
1529     return filteringFormat.roundOut(A + B, false);
1530 }
1531 
reconstructLinearXYChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation xOffset,vk::VkChromaLocation yOffset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1532 tcu::Interval reconstructLinearXYChromaSample(const tcu::FloatFormat &filteringFormat,
1533                                               const tcu::FloatFormat &conversionFormat, vk::VkChromaLocation xOffset,
1534                                               vk::VkChromaLocation yOffset, vk::VkSamplerAddressMode addressModeU,
1535                                               vk::VkSamplerAddressMode addressModeV, const ChannelAccess &access, int i,
1536                                               int j)
1537 {
1538     const int subI = xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ?
1539                          divFloor(i, 2) :
1540                          (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1541     const int subJ = yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ?
1542                          divFloor(j, 2) :
1543                          (j % 2 == 0 ? divFloor(j, 2) - 1 : divFloor(j, 2));
1544 
1545     const double a =
1546         xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (i % 2 == 0 ? 0.0 : 0.5) : (i % 2 == 0 ? 0.25 : 0.75);
1547     const double b =
1548         yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (j % 2 == 0 ? 0.0 : 0.5) : (j % 2 == 0 ? 0.25 : 0.75);
1549 
1550     return linearSample(access, conversionFormat, filteringFormat, addressModeU, addressModeV, tcu::IVec2(subI, subJ),
1551                         a, b);
1552 }
1553 
swizzle(vk::VkComponentSwizzle swizzle,const ChannelAccess & identityPlane,const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane)1554 const ChannelAccess &swizzle(vk::VkComponentSwizzle swizzle, const ChannelAccess &identityPlane,
1555                              const ChannelAccess &rPlane, const ChannelAccess &gPlane, const ChannelAccess &bPlane,
1556                              const ChannelAccess &aPlane)
1557 {
1558     switch (swizzle)
1559     {
1560     case vk::VK_COMPONENT_SWIZZLE_IDENTITY:
1561         return identityPlane;
1562     case vk::VK_COMPONENT_SWIZZLE_R:
1563         return rPlane;
1564     case vk::VK_COMPONENT_SWIZZLE_G:
1565         return gPlane;
1566     case vk::VK_COMPONENT_SWIZZLE_B:
1567         return bPlane;
1568     case vk::VK_COMPONENT_SWIZZLE_A:
1569         return aPlane;
1570 
1571     default:
1572         DE_FATAL("Unsupported swizzle");
1573         return identityPlane;
1574     }
1575 }
1576 
1577 } // namespace
1578 
wrap(vk::VkSamplerAddressMode addressMode,int coord,int size)1579 int wrap(vk::VkSamplerAddressMode addressMode, int coord, int size)
1580 {
1581     switch (addressMode)
1582     {
1583     case vk::VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
1584         return (size - 1) - mirror(imod(coord, 2 * size) - size);
1585 
1586     case vk::VK_SAMPLER_ADDRESS_MODE_REPEAT:
1587         return imod(coord, size);
1588 
1589     case vk::VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
1590         return de::clamp(coord, 0, size - 1);
1591 
1592     case vk::VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
1593         return de::clamp(mirror(coord), 0, size - 1);
1594 
1595     default:
1596         DE_FATAL("Unknown wrap mode");
1597         return ~0;
1598     }
1599 }
1600 
divFloor(int a,int b)1601 int divFloor(int a, int b)
1602 {
1603     if (a % b == 0)
1604         return a / b;
1605     else if (a > 0)
1606         return a / b;
1607     else
1608         return (a / b) - 1;
1609 }
1610 
calculateBounds(const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane,const UVec4 & bitDepth,const vector<Vec2> & sts,const vector<FloatFormat> & filteringFormat,const vector<FloatFormat> & conversionFormat,const uint32_t subTexelPrecisionBits,vk::VkFilter filter,vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,vk::VkFilter chromaFilter,vk::VkChromaLocation xChromaOffset,vk::VkChromaLocation yChromaOffset,const vk::VkComponentMapping & componentMapping,bool explicitReconstruction,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,std::vector<Vec4> & minBounds,std::vector<Vec4> & maxBounds,std::vector<Vec4> & uvBounds,std::vector<IVec4> & ijBounds)1611 void calculateBounds(const ChannelAccess &rPlane, const ChannelAccess &gPlane, const ChannelAccess &bPlane,
1612                      const ChannelAccess &aPlane, const UVec4 &bitDepth, const vector<Vec2> &sts,
1613                      const vector<FloatFormat> &filteringFormat, const vector<FloatFormat> &conversionFormat,
1614                      const uint32_t subTexelPrecisionBits, vk::VkFilter filter,
1615                      vk::VkSamplerYcbcrModelConversion colorModel, vk::VkSamplerYcbcrRange range,
1616                      vk::VkFilter chromaFilter, vk::VkChromaLocation xChromaOffset, vk::VkChromaLocation yChromaOffset,
1617                      const vk::VkComponentMapping &componentMapping, bool explicitReconstruction,
1618                      vk::VkSamplerAddressMode addressModeU, vk::VkSamplerAddressMode addressModeV,
1619                      std::vector<Vec4> &minBounds, std::vector<Vec4> &maxBounds, std::vector<Vec4> &uvBounds,
1620                      std::vector<IVec4> &ijBounds)
1621 {
1622     const FloatFormat highp(-126, 127, 23, true,
1623                             tcu::MAYBE,  // subnormals
1624                             tcu::YES,    // infinities
1625                             tcu::MAYBE); // NaN
1626     const FloatFormat coordFormat(-32, 32, 16, true);
1627     const ChannelAccess &rAccess(swizzle(componentMapping.r, rPlane, rPlane, gPlane, bPlane, aPlane));
1628     const ChannelAccess &gAccess(swizzle(componentMapping.g, gPlane, rPlane, gPlane, bPlane, aPlane));
1629     const ChannelAccess &bAccess(swizzle(componentMapping.b, bPlane, rPlane, gPlane, bPlane, aPlane));
1630     const ChannelAccess &aAccess(swizzle(componentMapping.a, aPlane, rPlane, gPlane, bPlane, aPlane));
1631 
1632     const bool subsampledX = gAccess.getSize().x() > rAccess.getSize().x();
1633     const bool subsampledY = gAccess.getSize().y() > rAccess.getSize().y();
1634 
1635     minBounds.resize(sts.size(), Vec4(TCU_INFINITY));
1636     maxBounds.resize(sts.size(), Vec4(-TCU_INFINITY));
1637 
1638     uvBounds.resize(sts.size(), Vec4(TCU_INFINITY, -TCU_INFINITY, TCU_INFINITY, -TCU_INFINITY));
1639     ijBounds.resize(sts.size(), IVec4(0x7FFFFFFF, -1 - 0x7FFFFFFF, 0x7FFFFFFF, -1 - 0x7FFFFFFF));
1640 
1641     // Chroma plane sizes must match
1642     DE_ASSERT(rAccess.getSize() == bAccess.getSize());
1643 
1644     // Luma plane sizes must match
1645     DE_ASSERT(gAccess.getSize() == aAccess.getSize());
1646 
1647     // Luma plane size must match chroma plane or be twice as big
1648     DE_ASSERT(rAccess.getSize().x() == gAccess.getSize().x() || 2 * rAccess.getSize().x() == gAccess.getSize().x());
1649     DE_ASSERT(rAccess.getSize().y() == gAccess.getSize().y() || 2 * rAccess.getSize().y() == gAccess.getSize().y());
1650 
1651     DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1652     DE_ASSERT(chromaFilter == vk::VK_FILTER_NEAREST || chromaFilter == vk::VK_FILTER_LINEAR);
1653     DE_ASSERT(subsampledX || !subsampledY);
1654 
1655     for (size_t ndx = 0; ndx < sts.size(); ndx++)
1656     {
1657         const Vec2 st(sts[ndx]);
1658         Interval bounds[4];
1659 
1660         const Interval u(calculateUV(coordFormat, st[0], gAccess.getSize().x()));
1661         const Interval v(calculateUV(coordFormat, st[1], gAccess.getSize().y()));
1662 
1663         uvBounds[ndx][0] = (float)u.lo();
1664         uvBounds[ndx][1] = (float)u.hi();
1665 
1666         uvBounds[ndx][2] = (float)v.lo();
1667         uvBounds[ndx][3] = (float)v.hi();
1668 
1669         const IVec2 iRange(calculateIJRange(filter, coordFormat, u));
1670         const IVec2 jRange(calculateIJRange(filter, coordFormat, v));
1671 
1672         ijBounds[ndx][0] = iRange[0];
1673         ijBounds[ndx][1] = iRange[1];
1674 
1675         ijBounds[ndx][2] = jRange[0];
1676         ijBounds[ndx][3] = jRange[1];
1677 
1678         for (int j = jRange.x(); j <= jRange.y(); j++)
1679             for (int i = iRange.x(); i <= iRange.y(); i++)
1680             {
1681                 if (filter == vk::VK_FILTER_NEAREST)
1682                 {
1683                     const Interval gValue(
1684                         lookupWrapped(gAccess, conversionFormat[1], addressModeU, addressModeV, IVec2(i, j)));
1685                     const Interval aValue(
1686                         lookupWrapped(aAccess, conversionFormat[3], addressModeU, addressModeV, IVec2(i, j)));
1687 
1688                     if (explicitReconstruction || !(subsampledX || subsampledY))
1689                     {
1690                         Interval rValue, bValue;
1691                         if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1692                         {
1693                             // Reconstruct using nearest if needed, otherwise, just take what's already there.
1694                             const int subI = subsampledX ? i / 2 : i;
1695                             const int subJ = subsampledY ? j / 2 : j;
1696                             rValue         = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1697                                                            IVec2(subI, subJ));
1698                             bValue         = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1699                                                            IVec2(subI, subJ));
1700                         }
1701                         else // vk::VK_FILTER_LINEAR
1702                         {
1703                             if (subsampledY)
1704                             {
1705                                 rValue = reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1706                                                                          xChromaOffset, yChromaOffset, addressModeU,
1707                                                                          addressModeV, rAccess, i, j);
1708                                 bValue = reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1709                                                                          xChromaOffset, yChromaOffset, addressModeU,
1710                                                                          addressModeV, bAccess, i, j);
1711                             }
1712                             else
1713                             {
1714                                 rValue = reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1715                                                                         xChromaOffset, addressModeU, addressModeV,
1716                                                                         rAccess, i, j);
1717                                 bValue = reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1718                                                                         xChromaOffset, addressModeU, addressModeV,
1719                                                                         bAccess, i, j);
1720                             }
1721                         }
1722 
1723                         const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1724                         Interval dstColor[4];
1725 
1726                         convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1727 
1728                         for (size_t compNdx = 0; compNdx < 4; compNdx++)
1729                             bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1730                     }
1731                     else
1732                     {
1733                         const Interval chromaU(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) :
1734                                                              u);
1735                         const Interval chromaV(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) :
1736                                                              v);
1737 
1738                         // Reconstructed chroma samples with implicit filtering
1739                         const IVec2 chromaIRange(subsampledX ? calculateIJRange(chromaFilter, coordFormat, chromaU) :
1740                                                                IVec2(i, i));
1741                         const IVec2 chromaJRange(subsampledY ? calculateIJRange(chromaFilter, coordFormat, chromaV) :
1742                                                                IVec2(j, j));
1743 
1744                         for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1745                             for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1746                             {
1747                                 Interval rValue, bValue;
1748 
1749                                 if (chromaFilter == vk::VK_FILTER_NEAREST)
1750                                 {
1751                                     rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1752                                                            IVec2(chromaI, chromaJ));
1753                                     bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1754                                                            IVec2(chromaI, chromaJ));
1755                                 }
1756                                 else // vk::VK_FILTER_LINEAR
1757                                 {
1758                                     const Interval chromaA(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1759                                     const Interval chromaB(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1760 
1761                                     rValue =
1762                                         linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU,
1763                                                      addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1764                                     bValue =
1765                                         linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU,
1766                                                      addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1767                                 }
1768 
1769                                 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1770 
1771                                 Interval dstColor[4];
1772                                 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1773 
1774                                 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1775                                     bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1776                             }
1777                     }
1778                 }
1779                 else // filter == vk::VK_FILTER_LINEAR
1780                 {
1781                     const Interval lumaA(calculateAB(subTexelPrecisionBits, u, i));
1782                     const Interval lumaB(calculateAB(subTexelPrecisionBits, v, j));
1783 
1784                     const Interval gValue(linearSample(gAccess, conversionFormat[1], filteringFormat[1], addressModeU,
1785                                                        addressModeV, IVec2(i, j), lumaA, lumaB));
1786                     const Interval aValue(linearSample(aAccess, conversionFormat[3], filteringFormat[3], addressModeU,
1787                                                        addressModeV, IVec2(i, j), lumaA, lumaB));
1788 
1789                     if (explicitReconstruction || !(subsampledX || subsampledY))
1790                     {
1791                         Interval rValue, bValue;
1792                         if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1793                         {
1794                             rValue = linearInterpolate(
1795                                 filteringFormat[0], lumaA, lumaB,
1796                                 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1797                                               IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1798                                 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1799                                               IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1800                                 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1801                                               IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1802                                 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1803                                               IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1804                             bValue = linearInterpolate(
1805                                 filteringFormat[2], lumaA, lumaB,
1806                                 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1807                                               IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1808                                 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1809                                               IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1810                                 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1811                                               IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1812                                 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1813                                               IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1814                         }
1815                         else // vk::VK_FILTER_LINEAR
1816                         {
1817                             if (subsampledY)
1818                             {
1819                                 // Linear, Reconstructed xx chroma samples with explicit linear filtering
1820                                 rValue = linearInterpolate(
1821                                     filteringFormat[0], lumaA, lumaB,
1822                                     reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1823                                                                     xChromaOffset, yChromaOffset, addressModeU,
1824                                                                     addressModeV, rAccess, i, j),
1825                                     reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1826                                                                     xChromaOffset, yChromaOffset, addressModeU,
1827                                                                     addressModeV, rAccess, i + 1, j),
1828                                     reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1829                                                                     xChromaOffset, yChromaOffset, addressModeU,
1830                                                                     addressModeV, rAccess, i, j + 1),
1831                                     reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1832                                                                     xChromaOffset, yChromaOffset, addressModeU,
1833                                                                     addressModeV, rAccess, i + 1, j + 1));
1834                                 bValue = linearInterpolate(
1835                                     filteringFormat[2], lumaA, lumaB,
1836                                     reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1837                                                                     xChromaOffset, yChromaOffset, addressModeU,
1838                                                                     addressModeV, bAccess, i, j),
1839                                     reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1840                                                                     xChromaOffset, yChromaOffset, addressModeU,
1841                                                                     addressModeV, bAccess, i + 1, j),
1842                                     reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1843                                                                     xChromaOffset, yChromaOffset, addressModeU,
1844                                                                     addressModeV, bAccess, i, j + 1),
1845                                     reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1846                                                                     xChromaOffset, yChromaOffset, addressModeU,
1847                                                                     addressModeV, bAccess, i + 1, j + 1));
1848                             }
1849                             else
1850                             {
1851                                 // Linear, Reconstructed x chroma samples with explicit linear filtering
1852                                 rValue = linearInterpolate(
1853                                     filteringFormat[0], lumaA, lumaB,
1854                                     reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1855                                                                    xChromaOffset, addressModeU, addressModeV, rAccess,
1856                                                                    i, j),
1857                                     reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1858                                                                    xChromaOffset, addressModeU, addressModeV, rAccess,
1859                                                                    i + 1, j),
1860                                     reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1861                                                                    xChromaOffset, addressModeU, addressModeV, rAccess,
1862                                                                    i, j + 1),
1863                                     reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1864                                                                    xChromaOffset, addressModeU, addressModeV, rAccess,
1865                                                                    i + 1, j + 1));
1866                                 bValue = linearInterpolate(
1867                                     filteringFormat[2], lumaA, lumaB,
1868                                     reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1869                                                                    xChromaOffset, addressModeU, addressModeV, bAccess,
1870                                                                    i, j),
1871                                     reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1872                                                                    xChromaOffset, addressModeU, addressModeV, bAccess,
1873                                                                    i + 1, j),
1874                                     reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1875                                                                    xChromaOffset, addressModeU, addressModeV, bAccess,
1876                                                                    i, j + 1),
1877                                     reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1878                                                                    xChromaOffset, addressModeU, addressModeV, bAccess,
1879                                                                    i + 1, j + 1));
1880                             }
1881                         }
1882 
1883                         const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1884                         Interval dstColor[4];
1885 
1886                         convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1887 
1888                         for (size_t compNdx = 0; compNdx < 4; compNdx++)
1889                             bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1890                     }
1891                     else
1892                     {
1893                         const Interval chromaU(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) :
1894                                                              u);
1895                         const Interval chromaV(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) :
1896                                                              v);
1897 
1898                         // TODO: It looks incorrect to ignore the chroma filter here. Is it?
1899                         const IVec2 chromaIRange(calculateNearestIJRange(coordFormat, chromaU));
1900                         const IVec2 chromaJRange(calculateNearestIJRange(coordFormat, chromaV));
1901 
1902                         for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1903                             for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1904                             {
1905                                 Interval rValue, bValue;
1906 
1907                                 if (chromaFilter == vk::VK_FILTER_NEAREST)
1908                                 {
1909                                     rValue = lookupWrapped(rAccess, conversionFormat[1], addressModeU, addressModeV,
1910                                                            IVec2(chromaI, chromaJ));
1911                                     bValue = lookupWrapped(bAccess, conversionFormat[3], addressModeU, addressModeV,
1912                                                            IVec2(chromaI, chromaJ));
1913                                 }
1914                                 else // vk::VK_FILTER_LINEAR
1915                                 {
1916                                     const Interval chromaA(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1917                                     const Interval chromaB(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1918 
1919                                     rValue =
1920                                         linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU,
1921                                                      addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1922                                     bValue =
1923                                         linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU,
1924                                                      addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1925                                 }
1926 
1927                                 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1928                                 Interval dstColor[4];
1929                                 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1930 
1931                                 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1932                                     bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1933                             }
1934                     }
1935                 }
1936             }
1937 
1938         minBounds[ndx] =
1939             Vec4((float)bounds[0].lo(), (float)bounds[1].lo(), (float)bounds[2].lo(), (float)bounds[3].lo());
1940         maxBounds[ndx] =
1941             Vec4((float)bounds[0].hi(), (float)bounds[1].hi(), (float)bounds[2].hi(), (float)bounds[3].hi());
1942     }
1943 }
1944 
1945 } // namespace ycbcr
1946 
1947 } // namespace vkt
1948