1 /*-------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2019 Google Inc.
6 * Copyright (c) 2019 The Khronos Group Inc.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 *//*!
21 * \file
22 * \brief YCbCr Test Utilities
23 *//*--------------------------------------------------------------------*/
24
25 #include "vktYCbCrUtil.hpp"
26
27 #include "vkQueryUtil.hpp"
28 #include "vkRefUtil.hpp"
29 #include "vkTypeUtil.hpp"
30 #include "vkCmdUtil.hpp"
31
32 #include "tcuTextureUtil.hpp"
33 #include "deMath.h"
34 #include "tcuFloat.hpp"
35 #include "tcuVector.hpp"
36 #include "tcuVectorUtil.hpp"
37
38 #include "deSTLUtil.hpp"
39 #include "deUniquePtr.hpp"
40
41 #include <limits>
42
43 namespace vkt
44 {
45 namespace ycbcr
46 {
47
48 using namespace vk;
49
50 using de::MovePtr;
51 using std::string;
52 using std::vector;
53 using tcu::FloatFormat;
54 using tcu::Interval;
55 using tcu::IVec2;
56 using tcu::IVec4;
57 using tcu::UVec2;
58 using tcu::UVec4;
59 using tcu::Vec2;
60 using tcu::Vec4;
61
62 // MultiPlaneImageData
63
MultiPlaneImageData(VkFormat format,const UVec2 & size)64 MultiPlaneImageData::MultiPlaneImageData(VkFormat format, const UVec2 &size)
65 : m_format(format)
66 , m_description(getPlanarFormatDescription(format))
67 , m_size(size)
68 {
69 for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
70 m_planeData[planeNdx].resize(
71 getPlaneSizeInBytes(m_description, size, planeNdx, 0, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY));
72 }
73
MultiPlaneImageData(const MultiPlaneImageData & other)74 MultiPlaneImageData::MultiPlaneImageData(const MultiPlaneImageData &other)
75 : m_format(other.m_format)
76 , m_description(other.m_description)
77 , m_size(other.m_size)
78 {
79 for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
80 m_planeData[planeNdx] = other.m_planeData[planeNdx];
81 }
82
~MultiPlaneImageData(void)83 MultiPlaneImageData::~MultiPlaneImageData(void)
84 {
85 }
86
getChannelAccess(uint32_t channelNdx)87 tcu::PixelBufferAccess MultiPlaneImageData::getChannelAccess(uint32_t channelNdx)
88 {
89 void *planePtrs[PlanarFormatDescription::MAX_PLANES];
90 uint32_t planeRowPitches[PlanarFormatDescription::MAX_PLANES];
91
92 for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
93 {
94 const uint32_t planeW = m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
95 planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
96 planePtrs[planeNdx] = &m_planeData[planeNdx][0];
97 }
98
99 return vk::getChannelAccess(m_description, m_size, planeRowPitches, planePtrs, channelNdx);
100 }
101
getChannelAccess(uint32_t channelNdx) const102 tcu::ConstPixelBufferAccess MultiPlaneImageData::getChannelAccess(uint32_t channelNdx) const
103 {
104 const void *planePtrs[PlanarFormatDescription::MAX_PLANES];
105 uint32_t planeRowPitches[PlanarFormatDescription::MAX_PLANES];
106
107 for (uint32_t planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
108 {
109 const uint32_t planeW = m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
110 planeRowPitches[planeNdx] = m_description.planes[planeNdx].elementSizeBytes * planeW;
111 planePtrs[planeNdx] = &m_planeData[planeNdx][0];
112 }
113
114 return vk::getChannelAccess(m_description, m_size, planeRowPitches, planePtrs, channelNdx);
115 }
116
117 // Misc utilities
118
119 namespace
120 {
121
allocateStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)122 void allocateStagingBuffers(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
123 const MultiPlaneImageData &imageData, vector<VkBufferSp> *buffers,
124 vector<AllocationSp> *allocations)
125 {
126 for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
127 {
128 const VkBufferCreateInfo bufferInfo = {
129 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
130 nullptr,
131 (VkBufferCreateFlags)0u,
132 (VkDeviceSize)imageData.getPlaneSize(planeNdx),
133 VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
134 VK_SHARING_MODE_EXCLUSIVE,
135 0u,
136 nullptr,
137 };
138 Move<VkBuffer> buffer(createBuffer(vkd, device, &bufferInfo));
139 MovePtr<Allocation> allocation(allocator.allocate(getBufferMemoryRequirements(vkd, device, *buffer),
140 MemoryRequirement::HostVisible | MemoryRequirement::Any));
141
142 VK_CHECK(vkd.bindBufferMemory(device, *buffer, allocation->getMemory(), allocation->getOffset()));
143
144 buffers->push_back(VkBufferSp(new Unique<VkBuffer>(buffer)));
145 allocations->push_back(AllocationSp(allocation.release()));
146 }
147 }
148
allocateAndWriteStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)149 void allocateAndWriteStagingBuffers(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
150 const MultiPlaneImageData &imageData, vector<VkBufferSp> *buffers,
151 vector<AllocationSp> *allocations)
152 {
153 allocateStagingBuffers(vkd, device, allocator, imageData, buffers, allocations);
154
155 for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
156 {
157 deMemcpy((*allocations)[planeNdx]->getHostPtr(), imageData.getPlanePtr(planeNdx),
158 imageData.getPlaneSize(planeNdx));
159 flushMappedMemoryRange(vkd, device, (*allocations)[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
160 }
161 }
162
readStagingBuffers(MultiPlaneImageData * imageData,const DeviceInterface & vkd,VkDevice device,const vector<AllocationSp> & allocations)163 void readStagingBuffers(MultiPlaneImageData *imageData, const DeviceInterface &vkd, VkDevice device,
164 const vector<AllocationSp> &allocations)
165 {
166 for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
167 {
168 invalidateMappedMemoryRange(vkd, device, allocations[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
169 deMemcpy(imageData->getPlanePtr(planeNdx), allocations[planeNdx]->getHostPtr(),
170 imageData->getPlaneSize(planeNdx));
171 }
172 }
173
174 } // namespace
175
checkImageSupport(Context & context,VkFormat format,VkImageCreateFlags createFlags,VkImageTiling tiling)176 void checkImageSupport(Context &context, VkFormat format, VkImageCreateFlags createFlags, VkImageTiling tiling)
177 {
178 const bool disjoint = (createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0;
179 const VkPhysicalDeviceSamplerYcbcrConversionFeatures features = context.getSamplerYcbcrConversionFeatures();
180
181 if (features.samplerYcbcrConversion == VK_FALSE)
182 TCU_THROW(NotSupportedError, "samplerYcbcrConversion is not supported");
183
184 if (disjoint)
185 {
186 context.requireDeviceFunctionality("VK_KHR_bind_memory2");
187 context.requireDeviceFunctionality("VK_KHR_get_memory_requirements2");
188 }
189
190 {
191 const VkFormatProperties formatProperties =
192 getPhysicalDeviceFormatProperties(context.getInstanceInterface(), context.getPhysicalDevice(), format);
193 const VkFormatFeatureFlags featureFlags = tiling == VK_IMAGE_TILING_OPTIMAL ?
194 formatProperties.optimalTilingFeatures :
195 formatProperties.linearTilingFeatures;
196
197 if ((featureFlags &
198 (VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT)) == 0)
199 TCU_THROW(NotSupportedError, "YCbCr conversion is not supported for format");
200
201 if (disjoint && ((featureFlags & VK_FORMAT_FEATURE_DISJOINT_BIT) == 0))
202 TCU_THROW(NotSupportedError, "Disjoint planes are not supported for format");
203 }
204 }
205
extractI420Frame(std::vector<uint8_t> & videoDataPtr,uint32_t frameNumber,uint32_t width,uint32_t height,vkt::ycbcr::MultiPlaneImageData * imageData,bool half_size)206 void extractI420Frame(std::vector<uint8_t> &videoDataPtr, uint32_t frameNumber, uint32_t width, uint32_t height,
207 vkt::ycbcr::MultiPlaneImageData *imageData, bool half_size)
208 {
209 uint32_t uOffset = width * height;
210 uint32_t vOffset = uOffset + (uOffset / 4);
211 uint32_t frameSize = uOffset + (uOffset / 2);
212
213 // Ensure the videoDataPtr is large enough for the requested frame
214 if (videoDataPtr.size() < (frameNumber + 1) * frameSize)
215 {
216 TCU_THROW(NotSupportedError, "Video data pointer content is too small for requested frame");
217 }
218
219 const uint8_t *yPlane = videoDataPtr.data() + frameNumber * frameSize;
220 const uint8_t *uPlane = videoDataPtr.data() + frameNumber * frameSize + uOffset;
221 const uint8_t *vPlane = videoDataPtr.data() + frameNumber * frameSize + vOffset;
222
223 uint8_t *yPlaneData = static_cast<uint8_t *>(imageData->getPlanePtr(0));
224 uint8_t *uvPlaneData = static_cast<uint8_t *>(imageData->getPlanePtr(1));
225
226 // If half_size is true, perform a simple 2x reduction
227 if (half_size)
228 {
229 for (uint32_t j = 0; j < height; j += 2)
230 {
231 for (uint32_t i = 0; i < width; i += 2)
232 {
233 yPlaneData[(j / 2) * (width / 2) + (i / 2)] = yPlane[j * width + i];
234 }
235 }
236 for (uint32_t j = 0; j < height / 2; j += 2)
237 {
238 for (uint32_t i = 0; i < width / 2; i += 2)
239 {
240 uint32_t reducedIndex = (j / 2) * (width / 4) + (i / 2);
241 uint32_t fullIndex = j * (width / 2) + i;
242
243 uvPlaneData[2 * reducedIndex] = uPlane[fullIndex];
244 uvPlaneData[2 * reducedIndex + 1] = vPlane[fullIndex];
245 }
246 }
247 }
248 else
249 {
250 // Writing NV12 frame
251 uint32_t yPlaneSize = width * height;
252 deMemcpy(yPlaneData, yPlane, yPlaneSize);
253
254 uint32_t uvPlaneSize = yPlaneSize / 2;
255 for (uint32_t i = 0; i < uvPlaneSize; i += 2)
256 {
257 uvPlaneData[i] = uPlane[i / 2];
258 uvPlaneData[i + 1] = vPlane[i / 2];
259 }
260 }
261 }
262
fillRandomNoNaN(de::Random * randomGen,uint8_t * const data,uint32_t size,const vk::VkFormat format)263 void fillRandomNoNaN(de::Random *randomGen, uint8_t *const data, uint32_t size, const vk::VkFormat format)
264 {
265 bool isFloat = false;
266 uint32_t stride = 1;
267
268 switch (format)
269 {
270 case vk::VK_FORMAT_B10G11R11_UFLOAT_PACK32:
271 isFloat = true;
272 stride = 1;
273 break;
274 case vk::VK_FORMAT_R16_SFLOAT:
275 case vk::VK_FORMAT_R16G16_SFLOAT:
276 case vk::VK_FORMAT_R16G16B16_SFLOAT:
277 case vk::VK_FORMAT_R16G16B16A16_SFLOAT:
278 isFloat = true;
279 stride = 2;
280 break;
281 case vk::VK_FORMAT_R32_SFLOAT:
282 case vk::VK_FORMAT_R32G32_SFLOAT:
283 case vk::VK_FORMAT_R32G32B32_SFLOAT:
284 case vk::VK_FORMAT_R32G32B32A32_SFLOAT:
285 isFloat = true;
286 stride = 4;
287 break;
288 case vk::VK_FORMAT_R64_SFLOAT:
289 case vk::VK_FORMAT_R64G64_SFLOAT:
290 case vk::VK_FORMAT_R64G64B64_SFLOAT:
291 case vk::VK_FORMAT_R64G64B64A64_SFLOAT:
292 isFloat = true;
293 stride = 8;
294 break;
295 default:
296 stride = 1;
297 break;
298 }
299
300 if (isFloat)
301 {
302 uint32_t ndx = 0;
303 for (; ndx < size - stride + 1; ndx += stride)
304 {
305 if (stride == 1)
306 {
307 // Set first bit of each channel to 0 to avoid NaNs, only format is B10G11R11
308 const uint8_t mask[] = {0x7F, 0xDF, 0xFB, 0xFF};
309 // Apply mask for both endians
310 data[ndx] = (randomGen->getUint8() & mask[ndx % 4]) & mask[3 - ndx % 4];
311 }
312 else if (stride == 2)
313 {
314 tcu::float16_t *const ptr = reinterpret_cast<tcu::float16_t *>(&data[ndx]);
315 *ptr = tcu::Float16(randomGen->getFloat()).bits();
316 }
317 else if (stride == 4)
318 {
319 float *ptr = reinterpret_cast<float *>(&data[ndx]);
320 *ptr = randomGen->getFloat();
321 }
322 else if (stride == 8)
323 {
324 double *ptr = reinterpret_cast<double *>(&data[ndx]);
325 *ptr = randomGen->getDouble();
326 }
327 }
328 while (ndx < size)
329 {
330 data[ndx] = 0;
331 }
332 }
333 else
334 {
335 for (uint32_t ndx = 0; ndx < size; ++ndx)
336 {
337 data[ndx] = randomGen->getUint8();
338 }
339 }
340 }
341
342 // When noNan is true, fillRandom does not generate NaNs in float formats.
fillRandom(de::Random * randomGen,MultiPlaneImageData * imageData,const vk::VkFormat format,const bool noNan)343 void fillRandom(de::Random *randomGen, MultiPlaneImageData *imageData, const vk::VkFormat format, const bool noNan)
344 {
345 for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
346 {
347 const size_t planeSize = imageData->getPlaneSize(planeNdx);
348 uint8_t *const planePtr = (uint8_t *)imageData->getPlanePtr(planeNdx);
349
350 if (noNan)
351 {
352 fillRandomNoNaN(randomGen, planePtr, (uint32_t)planeSize, format);
353 }
354 else
355 {
356 for (size_t ndx = 0; ndx < planeSize; ++ndx)
357 {
358 planePtr[ndx] = randomGen->getUint8();
359 }
360 }
361 }
362 }
363
fillGradient(MultiPlaneImageData * imageData,const tcu::Vec4 & minVal,const tcu::Vec4 & maxVal)364 void fillGradient(MultiPlaneImageData *imageData, const tcu::Vec4 &minVal, const tcu::Vec4 &maxVal)
365 {
366 const PlanarFormatDescription &formatInfo = imageData->getDescription();
367
368 // \todo [pyry] Optimize: no point in re-rendering source gradient for each channel.
369
370 for (uint32_t channelNdx = 0; channelNdx < 4; channelNdx++)
371 {
372 if (formatInfo.hasChannelNdx(channelNdx))
373 {
374 const tcu::PixelBufferAccess channelAccess = imageData->getChannelAccess(channelNdx);
375 tcu::TextureLevel tmpTexture(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::FLOAT),
376 channelAccess.getWidth(), channelAccess.getHeight());
377 const tcu::ConstPixelBufferAccess tmpAccess = tmpTexture.getAccess();
378
379 tcu::fillWithComponentGradients(tmpTexture, minVal, maxVal);
380
381 for (int y = 0; y < channelAccess.getHeight(); ++y)
382 for (int x = 0; x < channelAccess.getWidth(); ++x)
383 {
384 channelAccess.setPixel(tcu::Vec4(tmpAccess.getPixel(x, y)[channelNdx]), x, y);
385 }
386 }
387 }
388 }
389
fillZero(MultiPlaneImageData * imageData)390 void fillZero(MultiPlaneImageData *imageData)
391 {
392 for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
393 deMemset(imageData->getPlanePtr(planeNdx), 0, imageData->getPlaneSize(planeNdx));
394 }
395
allocateAndBindImageMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkImage image,VkFormat format,VkImageCreateFlags createFlags,vk::MemoryRequirement requirement)396 vector<AllocationSp> allocateAndBindImageMemory(const DeviceInterface &vkd, VkDevice device, Allocator &allocator,
397 VkImage image, VkFormat format, VkImageCreateFlags createFlags,
398 vk::MemoryRequirement requirement)
399 {
400 vector<AllocationSp> allocations;
401
402 if ((createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0)
403 {
404 const uint32_t numPlanes = getPlaneCount(format);
405
406 bindImagePlanesMemory(vkd, device, image, numPlanes, allocations, allocator, requirement);
407 }
408 else
409 {
410 const VkMemoryRequirements reqs = getImageMemoryRequirements(vkd, device, image);
411
412 allocations.push_back(AllocationSp(allocator.allocate(reqs, requirement).release()));
413
414 VK_CHECK(vkd.bindImageMemory(device, image, allocations.back()->getMemory(), allocations.back()->getOffset()));
415 }
416
417 return allocations;
418 }
419 // Accept only NV12
uploadImage(const DeviceInterface & vkd,VkDevice device,uint32_t queueFamilyNdx,Allocator & allocator,VkImage image,const MultiPlaneImageData & imageData,VkAccessFlags nextAccess,VkImageLayout finalLayout,uint32_t arrayLayer)420 void uploadImage(const DeviceInterface &vkd, VkDevice device, uint32_t queueFamilyNdx, Allocator &allocator,
421 VkImage image, const MultiPlaneImageData &imageData, VkAccessFlags nextAccess,
422 VkImageLayout finalLayout, uint32_t arrayLayer)
423 {
424 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
425 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
426 const Unique<VkCommandBuffer> cmdBuffer(
427 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
428 vector<VkBufferSp> stagingBuffers;
429 vector<AllocationSp> stagingMemory;
430
431 const PlanarFormatDescription &formatDesc = imageData.getDescription();
432
433 allocateAndWriteStagingBuffers(vkd, device, allocator, imageData, &stagingBuffers, &stagingMemory);
434
435 beginCommandBuffer(vkd, *cmdBuffer);
436
437 for (uint32_t planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
438 {
439 const VkImageAspectFlagBits aspect =
440 (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
441 const VkExtent3D imageExtent = makeExtent3D(imageData.getSize().x(), imageData.getSize().y(), 1u);
442 const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
443 const VkBufferImageCopy copy = {0u, // bufferOffset
444 0u, // bufferRowLength
445 0u, // bufferImageHeight
446 {(VkImageAspectFlags)aspect, 0u, arrayLayer, 1u},
447 makeOffset3D(0u, 0u, 0u),
448 planeExtent};
449
450 {
451 const VkImageMemoryBarrier preCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
452 nullptr,
453 (VkAccessFlags)0,
454 VK_ACCESS_TRANSFER_WRITE_BIT,
455 VK_IMAGE_LAYOUT_UNDEFINED,
456 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
457 VK_QUEUE_FAMILY_IGNORED,
458 VK_QUEUE_FAMILY_IGNORED,
459 image,
460 {(VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u}};
461
462 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
463 (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0u, 0u,
464 nullptr, 0u, nullptr, 1u, &preCopyBarrier);
465 }
466
467 vkd.cmdCopyBufferToImage(*cmdBuffer, **stagingBuffers[planeNdx], image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
468 1u, ©);
469
470 {
471 const VkImageMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
472 nullptr,
473 VK_ACCESS_TRANSFER_WRITE_BIT,
474 nextAccess,
475 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
476 finalLayout,
477 VK_QUEUE_FAMILY_IGNORED,
478 VK_QUEUE_FAMILY_IGNORED,
479 image,
480 {(VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u}};
481
482 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
483 (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, (VkDependencyFlags)0u, 0u,
484 nullptr, 0u, nullptr, 1u, &postCopyBarrier);
485 }
486 }
487
488 endCommandBuffer(vkd, *cmdBuffer);
489
490 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
491 }
492
fillImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,uint32_t queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,const MultiPlaneImageData & imageData,vk::VkAccessFlags nextAccess,vk::VkImageLayout finalLayout,uint32_t arrayLayer)493 void fillImageMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, uint32_t queueFamilyNdx, vk::VkImage image,
494 const std::vector<de::SharedPtr<vk::Allocation>> &allocations,
495 const MultiPlaneImageData &imageData, vk::VkAccessFlags nextAccess, vk::VkImageLayout finalLayout,
496 uint32_t arrayLayer)
497 {
498 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
499 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
500 const Unique<VkCommandBuffer> cmdBuffer(
501 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
502 const PlanarFormatDescription &formatDesc = imageData.getDescription();
503
504 for (uint32_t planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
505 {
506 const VkImageAspectFlagBits aspect =
507 (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
508 const de::SharedPtr<Allocation> &allocation = allocations.size() > 1 ? allocations[planeNdx] : allocations[0];
509 const size_t planeSize = imageData.getPlaneSize(planeNdx);
510 const uint32_t planeH = imageData.getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
511 const VkImageSubresource subresource = {
512 static_cast<vk::VkImageAspectFlags>(aspect),
513 0u,
514 arrayLayer,
515 };
516 VkSubresourceLayout layout;
517
518 vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
519
520 for (uint32_t row = 0; row < planeH; ++row)
521 {
522 const size_t rowSize = planeSize / planeH;
523 void *const dstPtr = ((uint8_t *)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
524 const void *const srcPtr = ((const uint8_t *)imageData.getPlanePtr(planeNdx)) + row * rowSize;
525
526 deMemcpy(dstPtr, srcPtr, rowSize);
527 }
528 flushMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
529 }
530
531 beginCommandBuffer(vkd, *cmdBuffer);
532
533 {
534 const VkImageMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
535 nullptr,
536 0u,
537 nextAccess,
538 VK_IMAGE_LAYOUT_PREINITIALIZED,
539 finalLayout,
540 VK_QUEUE_FAMILY_IGNORED,
541 VK_QUEUE_FAMILY_IGNORED,
542 image,
543 {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, arrayLayer, 1u}};
544
545 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
546 (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, (VkDependencyFlags)0u, 0u,
547 nullptr, 0u, nullptr, 1u, &postCopyBarrier);
548 }
549
550 endCommandBuffer(vkd, *cmdBuffer);
551
552 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
553 }
554
downloadImage(const DeviceInterface & vkd,VkDevice device,uint32_t queueFamilyNdx,Allocator & allocator,VkImage image,MultiPlaneImageData * imageData,VkAccessFlags prevAccess,VkImageLayout initialLayout,uint32_t baseArrayLayer)555 void downloadImage(const DeviceInterface &vkd, VkDevice device, uint32_t queueFamilyNdx, Allocator &allocator,
556 VkImage image, MultiPlaneImageData *imageData, VkAccessFlags prevAccess, VkImageLayout initialLayout,
557 uint32_t baseArrayLayer)
558 {
559 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
560 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
561 const Unique<VkCommandBuffer> cmdBuffer(
562 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
563 vector<VkBufferSp> stagingBuffers;
564 vector<AllocationSp> stagingMemory;
565
566 const PlanarFormatDescription &formatDesc = imageData->getDescription();
567
568 allocateStagingBuffers(vkd, device, allocator, *imageData, &stagingBuffers, &stagingMemory);
569
570 beginCommandBuffer(vkd, *cmdBuffer);
571
572 for (uint32_t planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
573 {
574 const VkImageAspectFlagBits aspect =
575 (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
576 {
577 const VkImageMemoryBarrier preCopyBarrier = {
578 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
579 nullptr,
580 prevAccess,
581 VK_ACCESS_TRANSFER_READ_BIT,
582 initialLayout,
583 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
584 VK_QUEUE_FAMILY_IGNORED,
585 VK_QUEUE_FAMILY_IGNORED,
586 image,
587 {static_cast<vk::VkImageAspectFlags>(aspect), 0u, 1u, baseArrayLayer, 1u}};
588
589 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
590 (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0u, 0u,
591 nullptr, 0u, nullptr, 1u, &preCopyBarrier);
592 }
593 {
594 const VkExtent3D imageExtent = makeExtent3D(imageData->getSize().x(), imageData->getSize().y(), 1u);
595 const VkExtent3D planeExtent = getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
596 const VkBufferImageCopy copy = {0u, // bufferOffset
597 0u, // bufferRowLength
598 0u, // bufferImageHeight
599 {(VkImageAspectFlags)aspect, 0u, baseArrayLayer, 1u},
600 makeOffset3D(0u, 0u, 0u),
601 planeExtent};
602
603 vkd.cmdCopyImageToBuffer(*cmdBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
604 **stagingBuffers[planeNdx], 1u, ©);
605 }
606 {
607 const VkBufferMemoryBarrier postCopyBarrier = {VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
608 nullptr,
609 VK_ACCESS_TRANSFER_WRITE_BIT,
610 VK_ACCESS_HOST_READ_BIT,
611 VK_QUEUE_FAMILY_IGNORED,
612 VK_QUEUE_FAMILY_IGNORED,
613 **stagingBuffers[planeNdx],
614 0u,
615 VK_WHOLE_SIZE};
616
617 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
618 (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0u, 0u, nullptr,
619 1u, &postCopyBarrier, 0u, nullptr);
620 }
621 }
622
623 endCommandBuffer(vkd, *cmdBuffer);
624
625 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
626
627 readStagingBuffers(imageData, vkd, device, stagingMemory);
628 }
629
readImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,uint32_t queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,MultiPlaneImageData * imageData,vk::VkAccessFlags prevAccess,vk::VkImageLayout initialLayout)630 void readImageMemory(const vk::DeviceInterface &vkd, vk::VkDevice device, uint32_t queueFamilyNdx, vk::VkImage image,
631 const std::vector<de::SharedPtr<vk::Allocation>> &allocations, MultiPlaneImageData *imageData,
632 vk::VkAccessFlags prevAccess, vk::VkImageLayout initialLayout)
633 {
634 const VkQueue queue = getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
635 const Unique<VkCommandPool> cmdPool(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
636 const Unique<VkCommandBuffer> cmdBuffer(
637 allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
638 const PlanarFormatDescription &formatDesc = imageData->getDescription();
639
640 beginCommandBuffer(vkd, *cmdBuffer);
641
642 {
643 const VkImageMemoryBarrier preCopyBarrier = {VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
644 nullptr,
645 prevAccess,
646 vk::VK_ACCESS_HOST_READ_BIT,
647 initialLayout,
648 VK_IMAGE_LAYOUT_GENERAL,
649 VK_QUEUE_FAMILY_IGNORED,
650 VK_QUEUE_FAMILY_IGNORED,
651 image,
652 {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
653
654 vkd.cmdPipelineBarrier(*cmdBuffer, (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
655 (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0u, 0u, nullptr, 0u,
656 nullptr, 1u, &preCopyBarrier);
657 }
658
659 endCommandBuffer(vkd, *cmdBuffer);
660
661 submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
662
663 for (uint32_t planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
664 {
665 const VkImageAspectFlagBits aspect =
666 (formatDesc.numPlanes > 1) ? getPlaneAspect(planeNdx) : VK_IMAGE_ASPECT_COLOR_BIT;
667 const de::SharedPtr<Allocation> &allocation = allocations.size() > 1 ? allocations[planeNdx] : allocations[0];
668 const size_t planeSize = imageData->getPlaneSize(planeNdx);
669 const uint32_t planeH = imageData->getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
670 const VkImageSubresource subresource = {
671 static_cast<vk::VkImageAspectFlags>(aspect),
672 0u,
673 0u,
674 };
675 VkSubresourceLayout layout;
676
677 vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
678
679 invalidateMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
680
681 for (uint32_t row = 0; row < planeH; ++row)
682 {
683 const size_t rowSize = planeSize / planeH;
684 const void *const srcPtr =
685 ((const uint8_t *)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
686 void *const dstPtr = ((uint8_t *)imageData->getPlanePtr(planeNdx)) + row * rowSize;
687
688 deMemcpy(dstPtr, srcPtr, rowSize);
689 }
690 }
691 }
692
693 // ChannelAccess utilities
694 namespace
695 {
696
697 //! Extend < 32b signed integer to 32b
signExtend(uint32_t src,int bits)698 inline int32_t signExtend(uint32_t src, int bits)
699 {
700 const uint32_t signBit = 1u << (bits - 1);
701
702 src |= ~((src & signBit) - 1);
703
704 return (int32_t)src;
705 }
706
divRoundUp(uint32_t a,uint32_t b)707 uint32_t divRoundUp(uint32_t a, uint32_t b)
708 {
709 if (a % b == 0)
710 return a / b;
711 else
712 return (a / b) + 1;
713 }
714
715 // \todo Taken from tcuTexture.cpp
716 // \todo [2011-09-21 pyry] Move to tcutil?
717 template <typename T>
convertSatRte(float f)718 inline T convertSatRte(float f)
719 {
720 // \note Doesn't work for 64-bit types
721 DE_STATIC_ASSERT(sizeof(T) < sizeof(uint64_t));
722 DE_STATIC_ASSERT((-3 % 2 != 0) && (-4 % 2 == 0));
723
724 int64_t minVal = std::numeric_limits<T>::min();
725 int64_t maxVal = std::numeric_limits<T>::max();
726 float q = deFloatFrac(f);
727 int64_t intVal = (int64_t)(f - q);
728
729 // Rounding.
730 if (q == 0.5f)
731 {
732 if (intVal % 2 != 0)
733 intVal++;
734 }
735 else if (q > 0.5f)
736 intVal++;
737 // else Don't add anything
738
739 // Saturate.
740 intVal = de::max(minVal, de::min(maxVal, intVal));
741
742 return (T)intVal;
743 }
744
745 } // namespace
746
ChannelAccess(tcu::TextureChannelClass channelClass,uint8_t channelSize,const tcu::IVec3 & size,const tcu::IVec3 & bitPitch,void * data,uint32_t bitOffset)747 ChannelAccess::ChannelAccess(tcu::TextureChannelClass channelClass, uint8_t channelSize, const tcu::IVec3 &size,
748 const tcu::IVec3 &bitPitch, void *data, uint32_t bitOffset)
749 : m_channelClass(channelClass)
750 , m_channelSize(channelSize)
751 , m_size(size)
752 , m_bitPitch(bitPitch)
753 , m_data((uint8_t *)data + (bitOffset / 8))
754 , m_bitOffset(bitOffset % 8)
755 {
756 }
757
getChannelUint(const tcu::IVec3 & pos) const758 uint32_t ChannelAccess::getChannelUint(const tcu::IVec3 &pos) const
759 {
760 DE_ASSERT(pos[0] < m_size[0]);
761 DE_ASSERT(pos[1] < m_size[1]);
762 DE_ASSERT(pos[2] < m_size[2]);
763
764 const int32_t bitOffset(m_bitOffset + tcu::dot(m_bitPitch, pos));
765 const uint8_t *const firstByte = ((const uint8_t *)m_data) + (bitOffset / 8);
766 const uint32_t byteCount = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
767 const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
768 const uint32_t offset = bitOffset % 8;
769 uint32_t bits = 0u;
770
771 deMemcpy(&bits, firstByte, byteCount);
772
773 return (bits >> offset) & mask;
774 }
775
setChannel(const tcu::IVec3 & pos,uint32_t x)776 void ChannelAccess::setChannel(const tcu::IVec3 &pos, uint32_t x)
777 {
778 DE_ASSERT(pos[0] < m_size[0]);
779 DE_ASSERT(pos[1] < m_size[1]);
780 DE_ASSERT(pos[2] < m_size[2]);
781
782 const int32_t bitOffset(m_bitOffset + tcu::dot(m_bitPitch, pos));
783 uint8_t *const firstByte = ((uint8_t *)m_data) + (bitOffset / 8);
784 const uint32_t byteCount = divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
785 const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
786 const uint32_t offset = bitOffset % 8;
787
788 const uint32_t bits = (x & mask) << offset;
789 uint32_t oldBits = 0;
790
791 deMemcpy(&oldBits, firstByte, byteCount);
792
793 {
794 const uint32_t newBits = bits | (oldBits & (~(mask << offset)));
795
796 deMemcpy(firstByte, &newBits, byteCount);
797 }
798 }
799
getChannel(const tcu::IVec3 & pos) const800 float ChannelAccess::getChannel(const tcu::IVec3 &pos) const
801 {
802 const uint32_t bits(getChannelUint(pos));
803
804 switch (m_channelClass)
805 {
806 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
807 return (float)bits / (float)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u));
808
809 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
810 return (float)bits;
811
812 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
813 return de::max(-1.0f, (float)signExtend(bits, m_channelSize) / (float)((0x1u << (m_channelSize - 1u)) - 1u));
814
815 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
816 return (float)signExtend(bits, m_channelSize);
817
818 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
819 if (m_channelSize == 32)
820 return tcu::Float32(bits).asFloat();
821 else
822 {
823 DE_FATAL("Float type not supported");
824 return -1.0f;
825 }
826
827 default:
828 DE_FATAL("Unknown texture channel class");
829 return -1.0f;
830 }
831 }
832
getChannel(const tcu::FloatFormat & conversionFormat,const tcu::IVec3 & pos) const833 tcu::Interval ChannelAccess::getChannel(const tcu::FloatFormat &conversionFormat, const tcu::IVec3 &pos) const
834 {
835 const uint32_t bits(getChannelUint(pos));
836
837 switch (m_channelClass)
838 {
839 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
840 return conversionFormat.roundOut(
841 conversionFormat.roundOut((double)bits, false) /
842 conversionFormat.roundOut((double)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u)),
843 false),
844 false);
845
846 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
847 return conversionFormat.roundOut((double)bits, false);
848
849 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
850 {
851 const tcu::Interval result(conversionFormat.roundOut(
852 conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false) /
853 conversionFormat.roundOut((double)((0x1u << (m_channelSize - 1u)) - 1u), false),
854 false));
855
856 return tcu::Interval(de::max(-1.0, result.lo()), de::max(-1.0, result.hi()));
857 }
858
859 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
860 return conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false);
861
862 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
863 if (m_channelSize == 32)
864 return conversionFormat.roundOut(tcu::Float32(bits).asFloat(), false);
865 else
866 {
867 DE_FATAL("Float type not supported");
868 return tcu::Interval();
869 }
870
871 default:
872 DE_FATAL("Unknown texture channel class");
873 return tcu::Interval();
874 }
875 }
876
setChannel(const tcu::IVec3 & pos,float x)877 void ChannelAccess::setChannel(const tcu::IVec3 &pos, float x)
878 {
879 DE_ASSERT(pos[0] < m_size[0]);
880 DE_ASSERT(pos[1] < m_size[1]);
881 DE_ASSERT(pos[2] < m_size[2]);
882
883 const uint32_t mask(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
884
885 switch (m_channelClass)
886 {
887 case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
888 {
889 const uint32_t maxValue(mask);
890 const uint32_t value(de::min(maxValue, (uint32_t)convertSatRte<uint32_t>(x * (float)maxValue)));
891 setChannel(pos, value);
892 break;
893 }
894
895 case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
896 {
897 const int32_t range((0x1u << (m_channelSize - 1u)) - 1u);
898 const uint32_t value((uint32_t)de::clamp<int32_t>(convertSatRte<int32_t>(x * (float)range), -range, range));
899 setChannel(pos, value);
900 break;
901 }
902
903 case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
904 {
905 const uint32_t maxValue(mask);
906 const uint32_t value(de::min(maxValue, (uint32_t)x));
907 setChannel(pos, value);
908 break;
909 }
910
911 case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
912 {
913 const int32_t minValue(-(int32_t)(1u << (m_channelSize - 1u)));
914 const int32_t maxValue((int32_t)((1u << (m_channelSize - 1u)) - 1u));
915 const uint32_t value((uint32_t)de::clamp((int32_t)x, minValue, maxValue));
916 setChannel(pos, value);
917 break;
918 }
919
920 case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
921 {
922 if (m_channelSize == 32)
923 {
924 const uint32_t value = tcu::Float32(x).bits();
925 setChannel(pos, value);
926 }
927 else
928 DE_FATAL("Float type not supported");
929 break;
930 }
931
932 default:
933 DE_FATAL("Unknown texture channel class");
934 }
935 }
936
getChannelAccess(MultiPlaneImageData & data,const vk::PlanarFormatDescription & formatInfo,const UVec2 & size,int channelNdx)937 ChannelAccess getChannelAccess(MultiPlaneImageData &data, const vk::PlanarFormatDescription &formatInfo,
938 const UVec2 &size, int channelNdx)
939 {
940 DE_ASSERT(formatInfo.hasChannelNdx(channelNdx));
941
942 const uint32_t planeNdx = formatInfo.channels[channelNdx].planeNdx;
943 const uint32_t valueOffsetBits = formatInfo.channels[channelNdx].offsetBits;
944 const uint32_t pixelStrideBytes = formatInfo.channels[channelNdx].strideBytes;
945 const uint32_t pixelStrideBits = pixelStrideBytes * 8;
946 const uint8_t sizeBits = formatInfo.channels[channelNdx].sizeBits;
947
948 DE_ASSERT(size.x() % (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor) == 0);
949 DE_ASSERT(size.y() % (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor) == 0);
950
951 uint32_t accessWidth = size.x() / (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor);
952 const uint32_t accessHeight = size.y() / (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor);
953 const uint32_t elementSizeBytes = formatInfo.planes[planeNdx].elementSizeBytes;
954 const uint32_t rowPitch = formatInfo.planes[planeNdx].elementSizeBytes * accessWidth;
955 const uint32_t rowPitchBits = rowPitch * 8;
956
957 if (pixelStrideBytes != elementSizeBytes)
958 {
959 DE_ASSERT(elementSizeBytes % pixelStrideBytes == 0);
960 accessWidth *= elementSizeBytes / pixelStrideBytes;
961 }
962
963 return ChannelAccess((tcu::TextureChannelClass)formatInfo.channels[channelNdx].type, sizeBits,
964 tcu::IVec3(accessWidth, accessHeight, 1u),
965 tcu::IVec3((int)pixelStrideBits, (int)rowPitchBits, 0), data.getPlanePtr(planeNdx),
966 (uint32_t)valueOffsetBits);
967 }
968
isXChromaSubsampled(vk::VkFormat format)969 bool isXChromaSubsampled(vk::VkFormat format)
970 {
971 switch (format)
972 {
973 case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
974 case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
975 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
976 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
977 case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
978 case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
979 case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
980 case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
981 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
982 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
983 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
984 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
985 case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
986 case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
987 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
988 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
989 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
990 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
991 case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
992 case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
993 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
994 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
995 case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
996 case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
997 return true;
998
999 default:
1000 return false;
1001 }
1002 }
1003
isYChromaSubsampled(vk::VkFormat format)1004 bool isYChromaSubsampled(vk::VkFormat format)
1005 {
1006 switch (format)
1007 {
1008 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1009 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1010 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1011 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1012 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1013 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1014 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1015 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1016 return true;
1017
1018 default:
1019 return false;
1020 }
1021 }
1022
areLsb6BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1023 bool areLsb6BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1024 {
1025 if ((srcFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) || (dstFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16) ||
1026 (srcFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
1027 (dstFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16) ||
1028 (srcFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
1029 (dstFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16) ||
1030 (srcFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
1031 (dstFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16) ||
1032 (srcFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
1033 (dstFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16) ||
1034 (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
1035 (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16) ||
1036 (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
1037 (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
1038 (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
1039 (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
1040 (srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
1041 (dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16) ||
1042 (srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16) ||
1043 (dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16))
1044 {
1045 return true;
1046 }
1047
1048 return false;
1049 }
1050
areLsb4BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1051 bool areLsb4BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1052 {
1053 if ((srcFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) || (dstFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16) ||
1054 (srcFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1055 (dstFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16) ||
1056 (srcFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1057 (dstFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16) ||
1058 (srcFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1059 (dstFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16) ||
1060 (srcFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1061 (dstFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16) ||
1062 (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1063 (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1064 (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1065 (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16) ||
1066 (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1067 (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1068 (srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1069 (dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16) ||
1070 (srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16) ||
1071 (dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16))
1072 {
1073 return true;
1074 }
1075
1076 return false;
1077 }
1078
1079 // \note Used for range expansion
getYCbCrBitDepth(vk::VkFormat format)1080 tcu::UVec4 getYCbCrBitDepth(vk::VkFormat format)
1081 {
1082 switch (format)
1083 {
1084 case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1085 case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1086 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1087 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1088 case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1089 case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1090 case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1091 case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1092 return tcu::UVec4(8, 8, 8, 0);
1093
1094 case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1095 return tcu::UVec4(10, 0, 0, 0);
1096
1097 case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1098 return tcu::UVec4(10, 10, 0, 0);
1099
1100 case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1101 return tcu::UVec4(10, 10, 10, 10);
1102
1103 case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1104 case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1105 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1106 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1107 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1108 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1109 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1110 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1111 return tcu::UVec4(10, 10, 10, 0);
1112
1113 case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1114 return tcu::UVec4(12, 0, 0, 0);
1115
1116 case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1117 return tcu::UVec4(12, 12, 0, 0);
1118
1119 case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1120 case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1121 case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1122 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1123 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1124 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1125 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1126 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1127 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1128 return tcu::UVec4(12, 12, 12, 12);
1129
1130 case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1131 case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1132 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1133 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1134 case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1135 case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1136 case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1137 case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1138 return tcu::UVec4(16, 16, 16, 0);
1139
1140 default:
1141 return tcu::getTextureFormatBitDepth(vk::mapVkFormat(format)).cast<uint32_t>();
1142 }
1143 }
1144
getPrecision(VkFormat format)1145 std::vector<tcu::FloatFormat> getPrecision(VkFormat format)
1146 {
1147 std::vector<FloatFormat> floatFormats;
1148 UVec4 channelDepth = getYCbCrBitDepth(format);
1149
1150 for (uint32_t channelIdx = 0; channelIdx < 4; channelIdx++)
1151 floatFormats.push_back(tcu::FloatFormat(0, 0, channelDepth[channelIdx], false, tcu::YES));
1152
1153 return floatFormats;
1154 }
1155
getYCbCrFormatChannelCount(vk::VkFormat format)1156 uint32_t getYCbCrFormatChannelCount(vk::VkFormat format)
1157 {
1158 switch (format)
1159 {
1160 case vk::VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1161 case vk::VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1162 case vk::VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1163 case vk::VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1164 case vk::VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1165 case vk::VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1166 case vk::VK_FORMAT_B8G8R8A8_UNORM:
1167 case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1168 case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1169 case vk::VK_FORMAT_R16G16B16A16_UNORM:
1170 case vk::VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1171 case vk::VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1172 case vk::VK_FORMAT_R8G8B8A8_UNORM:
1173 return 4;
1174
1175 case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1176 case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1177 case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1178 case vk::VK_FORMAT_B5G6R5_UNORM_PACK16:
1179 case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1180 case vk::VK_FORMAT_B8G8R8_UNORM:
1181 case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1182 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1183 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1184 case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1185 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1186 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1187 case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1188 case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1189 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1190 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1191 case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1192 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1193 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1194 case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1195 case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1196 case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1197 case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1198 case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1199 case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1200 case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1201 case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1202 case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1203 case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1204 case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1205 case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1206 case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1207 case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1208 case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1209 case vk::VK_FORMAT_R16G16B16_UNORM:
1210 case vk::VK_FORMAT_R5G6B5_UNORM_PACK16:
1211 case vk::VK_FORMAT_R8G8B8_UNORM:
1212 return 3;
1213
1214 case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1215 case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1216 return 2;
1217
1218 case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1219 case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1220 return 1;
1221
1222 default:
1223 DE_FATAL("Unknown number of channels");
1224 return -1;
1225 }
1226 }
1227
1228 // YCbCr color conversion utilities
1229 namespace
1230 {
1231
rangeExpandChroma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const uint32_t bits,const tcu::Interval & sample)1232 tcu::Interval rangeExpandChroma(vk::VkSamplerYcbcrRange range, const tcu::FloatFormat &conversionFormat,
1233 const uint32_t bits, const tcu::Interval &sample)
1234 {
1235 const uint32_t values(0x1u << bits);
1236
1237 switch (range)
1238 {
1239 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1240 return conversionFormat.roundOut(
1241 sample - conversionFormat.roundOut(
1242 tcu::Interval((double)(0x1u << (bits - 1u)) / (double)((0x1u << bits) - 1u)), false),
1243 false);
1244
1245 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1246 {
1247 const tcu::Interval a(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1248 const tcu::Interval dividend(
1249 conversionFormat.roundOut(a - tcu::Interval((double)(128u * (0x1u << (bits - 8u)))), false));
1250 const tcu::Interval divisor((double)(224u * (0x1u << (bits - 8u))));
1251 const tcu::Interval result(conversionFormat.roundOut(dividend / divisor, false));
1252
1253 return result;
1254 }
1255
1256 default:
1257 DE_FATAL("Unknown YCbCrRange");
1258 return tcu::Interval();
1259 }
1260 }
1261
rangeExpandLuma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const uint32_t bits,const tcu::Interval & sample)1262 tcu::Interval rangeExpandLuma(vk::VkSamplerYcbcrRange range, const tcu::FloatFormat &conversionFormat,
1263 const uint32_t bits, const tcu::Interval &sample)
1264 {
1265 const uint32_t values(0x1u << bits);
1266
1267 switch (range)
1268 {
1269 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1270 return conversionFormat.roundOut(sample, false);
1271
1272 case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1273 {
1274 const tcu::Interval a(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1275 const tcu::Interval dividend(
1276 conversionFormat.roundOut(a - tcu::Interval((double)(16u * (0x1u << (bits - 8u)))), false));
1277 const tcu::Interval divisor((double)(219u * (0x1u << (bits - 8u))));
1278 const tcu::Interval result(conversionFormat.roundOut(dividend / divisor, false));
1279
1280 return result;
1281 }
1282
1283 default:
1284 DE_FATAL("Unknown YCbCrRange");
1285 return tcu::Interval();
1286 }
1287 }
1288
clampMaybe(const tcu::Interval & x,double min,double max)1289 tcu::Interval clampMaybe(const tcu::Interval &x, double min, double max)
1290 {
1291 tcu::Interval result = x;
1292
1293 DE_ASSERT(min <= max);
1294
1295 if (x.lo() < min)
1296 result = result | tcu::Interval(min);
1297
1298 if (x.hi() > max)
1299 result = result | tcu::Interval(max);
1300
1301 return result;
1302 }
1303
convertColor(vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,const vector<tcu::FloatFormat> & conversionFormat,const tcu::UVec4 & bitDepth,const tcu::Interval input[4],tcu::Interval output[4])1304 void convertColor(vk::VkSamplerYcbcrModelConversion colorModel, vk::VkSamplerYcbcrRange range,
1305 const vector<tcu::FloatFormat> &conversionFormat, const tcu::UVec4 &bitDepth,
1306 const tcu::Interval input[4], tcu::Interval output[4])
1307 {
1308 switch (colorModel)
1309 {
1310 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
1311 {
1312 for (size_t ndx = 0; ndx < 4; ndx++)
1313 output[ndx] = input[ndx];
1314 break;
1315 }
1316
1317 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
1318 {
1319 output[0] = clampMaybe(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]), -0.5, 0.5);
1320 output[1] = clampMaybe(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]), 0.0, 1.0);
1321 output[2] = clampMaybe(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]), -0.5, 0.5);
1322 output[3] = input[3];
1323 break;
1324 }
1325
1326 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
1327 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
1328 case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
1329 {
1330 const tcu::Interval y(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]));
1331 const tcu::Interval cr(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]));
1332 const tcu::Interval cb(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]));
1333
1334 const tcu::Interval yClamped(clampMaybe(y, 0.0, 1.0));
1335 const tcu::Interval crClamped(clampMaybe(cr, -0.5, 0.5));
1336 const tcu::Interval cbClamped(clampMaybe(cb, -0.5, 0.5));
1337
1338 if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601)
1339 {
1340 output[0] =
1341 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.402 * crClamped, false), false);
1342 output[1] = conversionFormat[1].roundOut(
1343 conversionFormat[1].roundOut(
1344 yClamped - conversionFormat[1].roundOut((0.202008 / 0.587) * cbClamped, false), false) -
1345 conversionFormat[1].roundOut((0.419198 / 0.587) * crClamped, false),
1346 false);
1347 output[2] =
1348 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.772 * cbClamped, false), false);
1349 }
1350 else if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709)
1351 {
1352 output[0] =
1353 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.5748 * crClamped, false), false);
1354 output[1] = conversionFormat[1].roundOut(
1355 conversionFormat[1].roundOut(
1356 yClamped - conversionFormat[1].roundOut((0.13397432 / 0.7152) * cbClamped, false), false) -
1357 conversionFormat[1].roundOut((0.33480248 / 0.7152) * crClamped, false),
1358 false);
1359 output[2] =
1360 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8556 * cbClamped, false), false);
1361 }
1362 else
1363 {
1364 output[0] =
1365 conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.4746 * crClamped, false), false);
1366 output[1] = conversionFormat[1].roundOut(
1367 conversionFormat[1].roundOut(
1368 yClamped - conversionFormat[1].roundOut(
1369 conversionFormat[1].roundOut(0.11156702 / 0.6780, false) * cbClamped, false),
1370 false) -
1371 conversionFormat[1].roundOut(conversionFormat[1].roundOut(0.38737742 / 0.6780, false) * crClamped,
1372 false),
1373 false);
1374 output[2] =
1375 conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8814 * cbClamped, false), false);
1376 }
1377 output[3] = input[3];
1378 break;
1379 }
1380
1381 default:
1382 DE_FATAL("Unknown YCbCrModel");
1383 }
1384
1385 if (colorModel != vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
1386 {
1387 for (int ndx = 0; ndx < 3; ndx++)
1388 output[ndx] = clampMaybe(output[ndx], 0.0, 1.0);
1389 }
1390 }
1391
mirror(int coord)1392 int mirror(int coord)
1393 {
1394 return coord >= 0 ? coord : -(1 + coord);
1395 }
1396
imod(int a,int b)1397 int imod(int a, int b)
1398 {
1399 int m = a % b;
1400 return m < 0 ? m + b : m;
1401 }
1402
frac(const tcu::Interval & x)1403 tcu::Interval frac(const tcu::Interval &x)
1404 {
1405 if (x.hi() - x.lo() >= 1.0)
1406 return tcu::Interval(0.0, 1.0);
1407 else
1408 {
1409 const tcu::Interval ret(deFrac(x.lo()), deFrac(x.hi()));
1410
1411 return ret;
1412 }
1413 }
1414
calculateUV(const tcu::FloatFormat & coordFormat,const tcu::Interval & st,const int size)1415 tcu::Interval calculateUV(const tcu::FloatFormat &coordFormat, const tcu::Interval &st, const int size)
1416 {
1417 return coordFormat.roundOut(coordFormat.roundOut(st, false) * tcu::Interval((double)size), false);
1418 }
1419
calculateNearestIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1420 tcu::IVec2 calculateNearestIJRange(const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1421 {
1422 const tcu::Interval ij(coordFormat.roundOut(coordFormat.roundOut(uv, false) - tcu::Interval(0.5), false));
1423
1424 return tcu::IVec2(deRoundToInt32(ij.lo() - coordFormat.ulp(ij.lo(), 1)),
1425 deRoundToInt32(ij.hi() + coordFormat.ulp(ij.hi(), 1)));
1426 }
1427
1428 // Calculate range of pixel coordinates that can be used as lower coordinate for linear sampling
calculateLinearIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1429 tcu::IVec2 calculateLinearIJRange(const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1430 {
1431 const tcu::Interval ij(coordFormat.roundOut(uv - tcu::Interval(0.5), false));
1432
1433 return tcu::IVec2(deFloorToInt32(ij.lo()), deFloorToInt32(ij.hi()));
1434 }
1435
calculateIJRange(vk::VkFilter filter,const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1436 tcu::IVec2 calculateIJRange(vk::VkFilter filter, const tcu::FloatFormat &coordFormat, const tcu::Interval &uv)
1437 {
1438 DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1439 return (filter == vk::VK_FILTER_LINEAR) ? calculateLinearIJRange(coordFormat, uv) :
1440 calculateNearestIJRange(coordFormat, uv);
1441 }
1442
calculateAB(const uint32_t subTexelPrecisionBits,const tcu::Interval & uv,int ij)1443 tcu::Interval calculateAB(const uint32_t subTexelPrecisionBits, const tcu::Interval &uv, int ij)
1444 {
1445 const uint32_t subdivisions = 0x1u << subTexelPrecisionBits;
1446 const tcu::Interval ab(frac((uv - 0.5) & tcu::Interval((double)ij, (double)(ij + 1))));
1447 const tcu::Interval gridAB(ab * tcu::Interval(subdivisions));
1448 const tcu::Interval rounded(de::max(deFloor(gridAB.lo()) / subdivisions, 0.0),
1449 de::min(deCeil(gridAB.hi()) / subdivisions, 1.0));
1450
1451 return rounded;
1452 }
1453
lookupWrapped(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord)1454 tcu::Interval lookupWrapped(const ChannelAccess &access, const tcu::FloatFormat &conversionFormat,
1455 vk::VkSamplerAddressMode addressModeU, vk::VkSamplerAddressMode addressModeV,
1456 const tcu::IVec2 &coord)
1457 {
1458 tcu::Interval interval =
1459 access.getChannel(conversionFormat, tcu::IVec3(wrap(addressModeU, coord.x(), access.getSize().x()),
1460 wrap(addressModeV, coord.y(), access.getSize().y()), 0));
1461
1462 // Expand range for 10-bit conversions to +/-1.0 ULP
1463 if (conversionFormat.getFractionBits() == 10)
1464 {
1465 interval |= interval.lo() - interval.length() / 2.0;
1466 interval |= interval.hi() + interval.length() / 2.0;
1467 }
1468
1469 return interval;
1470 }
1471
linearInterpolate(const tcu::FloatFormat & filteringFormat,const tcu::Interval & a,const tcu::Interval & b,const tcu::Interval & p00,const tcu::Interval & p10,const tcu::Interval & p01,const tcu::Interval & p11)1472 tcu::Interval linearInterpolate(const tcu::FloatFormat &filteringFormat, const tcu::Interval &a, const tcu::Interval &b,
1473 const tcu::Interval &p00, const tcu::Interval &p10, const tcu::Interval &p01,
1474 const tcu::Interval &p11)
1475 {
1476 const tcu::Interval p[4] = {p00, p10, p01, p11};
1477 tcu::Interval result(0.0);
1478
1479 for (size_t ndx = 0; ndx < 4; ndx++)
1480 {
1481 const tcu::Interval weightA(filteringFormat.roundOut((ndx % 2) == 0 ? (1.0 - a) : a, false));
1482 const tcu::Interval weightB(filteringFormat.roundOut((ndx / 2) == 0 ? (1.0 - b) : b, false));
1483 const tcu::Interval weight(filteringFormat.roundOut(weightA * weightB, false));
1484
1485 result = filteringFormat.roundOut(result + filteringFormat.roundOut(p[ndx] * weight, false), false);
1486 }
1487
1488 return result;
1489 }
1490
calculateImplicitChromaUV(const tcu::FloatFormat & coordFormat,vk::VkChromaLocation offset,const tcu::Interval & uv)1491 tcu::Interval calculateImplicitChromaUV(const tcu::FloatFormat &coordFormat, vk::VkChromaLocation offset,
1492 const tcu::Interval &uv)
1493 {
1494 if (offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN)
1495 return coordFormat.roundOut(0.5 * coordFormat.roundOut(uv + 0.5, false), false);
1496 else
1497 return coordFormat.roundOut(0.5 * uv, false);
1498 }
1499
linearSample(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,const tcu::FloatFormat & filteringFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord,const tcu::Interval & a,const tcu::Interval & b)1500 tcu::Interval linearSample(const ChannelAccess &access, const tcu::FloatFormat &conversionFormat,
1501 const tcu::FloatFormat &filteringFormat, vk::VkSamplerAddressMode addressModeU,
1502 vk::VkSamplerAddressMode addressModeV, const tcu::IVec2 &coord, const tcu::Interval &a,
1503 const tcu::Interval &b)
1504 {
1505 return linearInterpolate(
1506 filteringFormat, a, b,
1507 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 0)),
1508 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 0)),
1509 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 1)),
1510 lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 1)));
1511 }
1512
reconstructLinearXChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation offset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1513 tcu::Interval reconstructLinearXChromaSample(const tcu::FloatFormat &filteringFormat,
1514 const tcu::FloatFormat &conversionFormat, vk::VkChromaLocation offset,
1515 vk::VkSamplerAddressMode addressModeU,
1516 vk::VkSamplerAddressMode addressModeV, const ChannelAccess &access, int i,
1517 int j)
1518 {
1519 const int subI = offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? divFloor(i, 2) :
1520 (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1521 const double a =
1522 offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (i % 2 == 0 ? 0.0 : 0.5) : (i % 2 == 0 ? 0.25 : 0.75);
1523
1524 const tcu::Interval A(filteringFormat.roundOut(
1525 a * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI, j)), false));
1526 const tcu::Interval B(filteringFormat.roundOut(
1527 (1.0 - a) * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI + 1, j)),
1528 false));
1529 return filteringFormat.roundOut(A + B, false);
1530 }
1531
reconstructLinearXYChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation xOffset,vk::VkChromaLocation yOffset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1532 tcu::Interval reconstructLinearXYChromaSample(const tcu::FloatFormat &filteringFormat,
1533 const tcu::FloatFormat &conversionFormat, vk::VkChromaLocation xOffset,
1534 vk::VkChromaLocation yOffset, vk::VkSamplerAddressMode addressModeU,
1535 vk::VkSamplerAddressMode addressModeV, const ChannelAccess &access, int i,
1536 int j)
1537 {
1538 const int subI = xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ?
1539 divFloor(i, 2) :
1540 (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1541 const int subJ = yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ?
1542 divFloor(j, 2) :
1543 (j % 2 == 0 ? divFloor(j, 2) - 1 : divFloor(j, 2));
1544
1545 const double a =
1546 xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (i % 2 == 0 ? 0.0 : 0.5) : (i % 2 == 0 ? 0.25 : 0.75);
1547 const double b =
1548 yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN ? (j % 2 == 0 ? 0.0 : 0.5) : (j % 2 == 0 ? 0.25 : 0.75);
1549
1550 return linearSample(access, conversionFormat, filteringFormat, addressModeU, addressModeV, tcu::IVec2(subI, subJ),
1551 a, b);
1552 }
1553
swizzle(vk::VkComponentSwizzle swizzle,const ChannelAccess & identityPlane,const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane)1554 const ChannelAccess &swizzle(vk::VkComponentSwizzle swizzle, const ChannelAccess &identityPlane,
1555 const ChannelAccess &rPlane, const ChannelAccess &gPlane, const ChannelAccess &bPlane,
1556 const ChannelAccess &aPlane)
1557 {
1558 switch (swizzle)
1559 {
1560 case vk::VK_COMPONENT_SWIZZLE_IDENTITY:
1561 return identityPlane;
1562 case vk::VK_COMPONENT_SWIZZLE_R:
1563 return rPlane;
1564 case vk::VK_COMPONENT_SWIZZLE_G:
1565 return gPlane;
1566 case vk::VK_COMPONENT_SWIZZLE_B:
1567 return bPlane;
1568 case vk::VK_COMPONENT_SWIZZLE_A:
1569 return aPlane;
1570
1571 default:
1572 DE_FATAL("Unsupported swizzle");
1573 return identityPlane;
1574 }
1575 }
1576
1577 } // namespace
1578
wrap(vk::VkSamplerAddressMode addressMode,int coord,int size)1579 int wrap(vk::VkSamplerAddressMode addressMode, int coord, int size)
1580 {
1581 switch (addressMode)
1582 {
1583 case vk::VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
1584 return (size - 1) - mirror(imod(coord, 2 * size) - size);
1585
1586 case vk::VK_SAMPLER_ADDRESS_MODE_REPEAT:
1587 return imod(coord, size);
1588
1589 case vk::VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
1590 return de::clamp(coord, 0, size - 1);
1591
1592 case vk::VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
1593 return de::clamp(mirror(coord), 0, size - 1);
1594
1595 default:
1596 DE_FATAL("Unknown wrap mode");
1597 return ~0;
1598 }
1599 }
1600
divFloor(int a,int b)1601 int divFloor(int a, int b)
1602 {
1603 if (a % b == 0)
1604 return a / b;
1605 else if (a > 0)
1606 return a / b;
1607 else
1608 return (a / b) - 1;
1609 }
1610
calculateBounds(const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane,const UVec4 & bitDepth,const vector<Vec2> & sts,const vector<FloatFormat> & filteringFormat,const vector<FloatFormat> & conversionFormat,const uint32_t subTexelPrecisionBits,vk::VkFilter filter,vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,vk::VkFilter chromaFilter,vk::VkChromaLocation xChromaOffset,vk::VkChromaLocation yChromaOffset,const vk::VkComponentMapping & componentMapping,bool explicitReconstruction,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,std::vector<Vec4> & minBounds,std::vector<Vec4> & maxBounds,std::vector<Vec4> & uvBounds,std::vector<IVec4> & ijBounds)1611 void calculateBounds(const ChannelAccess &rPlane, const ChannelAccess &gPlane, const ChannelAccess &bPlane,
1612 const ChannelAccess &aPlane, const UVec4 &bitDepth, const vector<Vec2> &sts,
1613 const vector<FloatFormat> &filteringFormat, const vector<FloatFormat> &conversionFormat,
1614 const uint32_t subTexelPrecisionBits, vk::VkFilter filter,
1615 vk::VkSamplerYcbcrModelConversion colorModel, vk::VkSamplerYcbcrRange range,
1616 vk::VkFilter chromaFilter, vk::VkChromaLocation xChromaOffset, vk::VkChromaLocation yChromaOffset,
1617 const vk::VkComponentMapping &componentMapping, bool explicitReconstruction,
1618 vk::VkSamplerAddressMode addressModeU, vk::VkSamplerAddressMode addressModeV,
1619 std::vector<Vec4> &minBounds, std::vector<Vec4> &maxBounds, std::vector<Vec4> &uvBounds,
1620 std::vector<IVec4> &ijBounds)
1621 {
1622 const FloatFormat highp(-126, 127, 23, true,
1623 tcu::MAYBE, // subnormals
1624 tcu::YES, // infinities
1625 tcu::MAYBE); // NaN
1626 const FloatFormat coordFormat(-32, 32, 16, true);
1627 const ChannelAccess &rAccess(swizzle(componentMapping.r, rPlane, rPlane, gPlane, bPlane, aPlane));
1628 const ChannelAccess &gAccess(swizzle(componentMapping.g, gPlane, rPlane, gPlane, bPlane, aPlane));
1629 const ChannelAccess &bAccess(swizzle(componentMapping.b, bPlane, rPlane, gPlane, bPlane, aPlane));
1630 const ChannelAccess &aAccess(swizzle(componentMapping.a, aPlane, rPlane, gPlane, bPlane, aPlane));
1631
1632 const bool subsampledX = gAccess.getSize().x() > rAccess.getSize().x();
1633 const bool subsampledY = gAccess.getSize().y() > rAccess.getSize().y();
1634
1635 minBounds.resize(sts.size(), Vec4(TCU_INFINITY));
1636 maxBounds.resize(sts.size(), Vec4(-TCU_INFINITY));
1637
1638 uvBounds.resize(sts.size(), Vec4(TCU_INFINITY, -TCU_INFINITY, TCU_INFINITY, -TCU_INFINITY));
1639 ijBounds.resize(sts.size(), IVec4(0x7FFFFFFF, -1 - 0x7FFFFFFF, 0x7FFFFFFF, -1 - 0x7FFFFFFF));
1640
1641 // Chroma plane sizes must match
1642 DE_ASSERT(rAccess.getSize() == bAccess.getSize());
1643
1644 // Luma plane sizes must match
1645 DE_ASSERT(gAccess.getSize() == aAccess.getSize());
1646
1647 // Luma plane size must match chroma plane or be twice as big
1648 DE_ASSERT(rAccess.getSize().x() == gAccess.getSize().x() || 2 * rAccess.getSize().x() == gAccess.getSize().x());
1649 DE_ASSERT(rAccess.getSize().y() == gAccess.getSize().y() || 2 * rAccess.getSize().y() == gAccess.getSize().y());
1650
1651 DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1652 DE_ASSERT(chromaFilter == vk::VK_FILTER_NEAREST || chromaFilter == vk::VK_FILTER_LINEAR);
1653 DE_ASSERT(subsampledX || !subsampledY);
1654
1655 for (size_t ndx = 0; ndx < sts.size(); ndx++)
1656 {
1657 const Vec2 st(sts[ndx]);
1658 Interval bounds[4];
1659
1660 const Interval u(calculateUV(coordFormat, st[0], gAccess.getSize().x()));
1661 const Interval v(calculateUV(coordFormat, st[1], gAccess.getSize().y()));
1662
1663 uvBounds[ndx][0] = (float)u.lo();
1664 uvBounds[ndx][1] = (float)u.hi();
1665
1666 uvBounds[ndx][2] = (float)v.lo();
1667 uvBounds[ndx][3] = (float)v.hi();
1668
1669 const IVec2 iRange(calculateIJRange(filter, coordFormat, u));
1670 const IVec2 jRange(calculateIJRange(filter, coordFormat, v));
1671
1672 ijBounds[ndx][0] = iRange[0];
1673 ijBounds[ndx][1] = iRange[1];
1674
1675 ijBounds[ndx][2] = jRange[0];
1676 ijBounds[ndx][3] = jRange[1];
1677
1678 for (int j = jRange.x(); j <= jRange.y(); j++)
1679 for (int i = iRange.x(); i <= iRange.y(); i++)
1680 {
1681 if (filter == vk::VK_FILTER_NEAREST)
1682 {
1683 const Interval gValue(
1684 lookupWrapped(gAccess, conversionFormat[1], addressModeU, addressModeV, IVec2(i, j)));
1685 const Interval aValue(
1686 lookupWrapped(aAccess, conversionFormat[3], addressModeU, addressModeV, IVec2(i, j)));
1687
1688 if (explicitReconstruction || !(subsampledX || subsampledY))
1689 {
1690 Interval rValue, bValue;
1691 if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1692 {
1693 // Reconstruct using nearest if needed, otherwise, just take what's already there.
1694 const int subI = subsampledX ? i / 2 : i;
1695 const int subJ = subsampledY ? j / 2 : j;
1696 rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1697 IVec2(subI, subJ));
1698 bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1699 IVec2(subI, subJ));
1700 }
1701 else // vk::VK_FILTER_LINEAR
1702 {
1703 if (subsampledY)
1704 {
1705 rValue = reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1706 xChromaOffset, yChromaOffset, addressModeU,
1707 addressModeV, rAccess, i, j);
1708 bValue = reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1709 xChromaOffset, yChromaOffset, addressModeU,
1710 addressModeV, bAccess, i, j);
1711 }
1712 else
1713 {
1714 rValue = reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1715 xChromaOffset, addressModeU, addressModeV,
1716 rAccess, i, j);
1717 bValue = reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1718 xChromaOffset, addressModeU, addressModeV,
1719 bAccess, i, j);
1720 }
1721 }
1722
1723 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1724 Interval dstColor[4];
1725
1726 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1727
1728 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1729 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1730 }
1731 else
1732 {
1733 const Interval chromaU(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) :
1734 u);
1735 const Interval chromaV(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) :
1736 v);
1737
1738 // Reconstructed chroma samples with implicit filtering
1739 const IVec2 chromaIRange(subsampledX ? calculateIJRange(chromaFilter, coordFormat, chromaU) :
1740 IVec2(i, i));
1741 const IVec2 chromaJRange(subsampledY ? calculateIJRange(chromaFilter, coordFormat, chromaV) :
1742 IVec2(j, j));
1743
1744 for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1745 for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1746 {
1747 Interval rValue, bValue;
1748
1749 if (chromaFilter == vk::VK_FILTER_NEAREST)
1750 {
1751 rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1752 IVec2(chromaI, chromaJ));
1753 bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1754 IVec2(chromaI, chromaJ));
1755 }
1756 else // vk::VK_FILTER_LINEAR
1757 {
1758 const Interval chromaA(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1759 const Interval chromaB(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1760
1761 rValue =
1762 linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU,
1763 addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1764 bValue =
1765 linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU,
1766 addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1767 }
1768
1769 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1770
1771 Interval dstColor[4];
1772 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1773
1774 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1775 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1776 }
1777 }
1778 }
1779 else // filter == vk::VK_FILTER_LINEAR
1780 {
1781 const Interval lumaA(calculateAB(subTexelPrecisionBits, u, i));
1782 const Interval lumaB(calculateAB(subTexelPrecisionBits, v, j));
1783
1784 const Interval gValue(linearSample(gAccess, conversionFormat[1], filteringFormat[1], addressModeU,
1785 addressModeV, IVec2(i, j), lumaA, lumaB));
1786 const Interval aValue(linearSample(aAccess, conversionFormat[3], filteringFormat[3], addressModeU,
1787 addressModeV, IVec2(i, j), lumaA, lumaB));
1788
1789 if (explicitReconstruction || !(subsampledX || subsampledY))
1790 {
1791 Interval rValue, bValue;
1792 if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1793 {
1794 rValue = linearInterpolate(
1795 filteringFormat[0], lumaA, lumaB,
1796 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1797 IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1798 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1799 IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1800 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1801 IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1802 lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV,
1803 IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1804 bValue = linearInterpolate(
1805 filteringFormat[2], lumaA, lumaB,
1806 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1807 IVec2(i / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1808 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1809 IVec2((i + 1) / (subsampledX ? 2 : 1), j / (subsampledY ? 2 : 1))),
1810 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1811 IVec2(i / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1812 lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV,
1813 IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1814 }
1815 else // vk::VK_FILTER_LINEAR
1816 {
1817 if (subsampledY)
1818 {
1819 // Linear, Reconstructed xx chroma samples with explicit linear filtering
1820 rValue = linearInterpolate(
1821 filteringFormat[0], lumaA, lumaB,
1822 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1823 xChromaOffset, yChromaOffset, addressModeU,
1824 addressModeV, rAccess, i, j),
1825 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1826 xChromaOffset, yChromaOffset, addressModeU,
1827 addressModeV, rAccess, i + 1, j),
1828 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1829 xChromaOffset, yChromaOffset, addressModeU,
1830 addressModeV, rAccess, i, j + 1),
1831 reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0],
1832 xChromaOffset, yChromaOffset, addressModeU,
1833 addressModeV, rAccess, i + 1, j + 1));
1834 bValue = linearInterpolate(
1835 filteringFormat[2], lumaA, lumaB,
1836 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1837 xChromaOffset, yChromaOffset, addressModeU,
1838 addressModeV, bAccess, i, j),
1839 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1840 xChromaOffset, yChromaOffset, addressModeU,
1841 addressModeV, bAccess, i + 1, j),
1842 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1843 xChromaOffset, yChromaOffset, addressModeU,
1844 addressModeV, bAccess, i, j + 1),
1845 reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2],
1846 xChromaOffset, yChromaOffset, addressModeU,
1847 addressModeV, bAccess, i + 1, j + 1));
1848 }
1849 else
1850 {
1851 // Linear, Reconstructed x chroma samples with explicit linear filtering
1852 rValue = linearInterpolate(
1853 filteringFormat[0], lumaA, lumaB,
1854 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1855 xChromaOffset, addressModeU, addressModeV, rAccess,
1856 i, j),
1857 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1858 xChromaOffset, addressModeU, addressModeV, rAccess,
1859 i + 1, j),
1860 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1861 xChromaOffset, addressModeU, addressModeV, rAccess,
1862 i, j + 1),
1863 reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0],
1864 xChromaOffset, addressModeU, addressModeV, rAccess,
1865 i + 1, j + 1));
1866 bValue = linearInterpolate(
1867 filteringFormat[2], lumaA, lumaB,
1868 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1869 xChromaOffset, addressModeU, addressModeV, bAccess,
1870 i, j),
1871 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1872 xChromaOffset, addressModeU, addressModeV, bAccess,
1873 i + 1, j),
1874 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1875 xChromaOffset, addressModeU, addressModeV, bAccess,
1876 i, j + 1),
1877 reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2],
1878 xChromaOffset, addressModeU, addressModeV, bAccess,
1879 i + 1, j + 1));
1880 }
1881 }
1882
1883 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1884 Interval dstColor[4];
1885
1886 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1887
1888 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1889 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1890 }
1891 else
1892 {
1893 const Interval chromaU(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) :
1894 u);
1895 const Interval chromaV(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) :
1896 v);
1897
1898 // TODO: It looks incorrect to ignore the chroma filter here. Is it?
1899 const IVec2 chromaIRange(calculateNearestIJRange(coordFormat, chromaU));
1900 const IVec2 chromaJRange(calculateNearestIJRange(coordFormat, chromaV));
1901
1902 for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1903 for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1904 {
1905 Interval rValue, bValue;
1906
1907 if (chromaFilter == vk::VK_FILTER_NEAREST)
1908 {
1909 rValue = lookupWrapped(rAccess, conversionFormat[1], addressModeU, addressModeV,
1910 IVec2(chromaI, chromaJ));
1911 bValue = lookupWrapped(bAccess, conversionFormat[3], addressModeU, addressModeV,
1912 IVec2(chromaI, chromaJ));
1913 }
1914 else // vk::VK_FILTER_LINEAR
1915 {
1916 const Interval chromaA(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1917 const Interval chromaB(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1918
1919 rValue =
1920 linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU,
1921 addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1922 bValue =
1923 linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU,
1924 addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1925 }
1926
1927 const Interval srcColor[] = {rValue, gValue, bValue, aValue};
1928 Interval dstColor[4];
1929 convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1930
1931 for (size_t compNdx = 0; compNdx < 4; compNdx++)
1932 bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1933 }
1934 }
1935 }
1936 }
1937
1938 minBounds[ndx] =
1939 Vec4((float)bounds[0].lo(), (float)bounds[1].lo(), (float)bounds[2].lo(), (float)bounds[3].lo());
1940 maxBounds[ndx] =
1941 Vec4((float)bounds[0].hi(), (float)bounds[1].hi(), (float)bounds[2].hi(), (float)bounds[3].hi());
1942 }
1943 }
1944
1945 } // namespace ycbcr
1946
1947 } // namespace vkt
1948