• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 Google Inc.
6  * Copyright (c) 2019 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief YCbCr Test Utilities
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktYCbCrUtil.hpp"
26 
27 #include "vkQueryUtil.hpp"
28 #include "vkRefUtil.hpp"
29 #include "vkTypeUtil.hpp"
30 #include "vkCmdUtil.hpp"
31 
32 #include "tcuTextureUtil.hpp"
33 #include "deMath.h"
34 #include "deFloat16.h"
35 #include "tcuVector.hpp"
36 #include "tcuVectorUtil.hpp"
37 
38 #include "deSTLUtil.hpp"
39 #include "deUniquePtr.hpp"
40 
41 #include <limits>
42 
43 namespace vkt
44 {
45 namespace ycbcr
46 {
47 
48 using namespace vk;
49 
50 using de::MovePtr;
51 using tcu::FloatFormat;
52 using tcu::Interval;
53 using tcu::IVec2;
54 using tcu::IVec4;
55 using tcu::UVec2;
56 using tcu::UVec4;
57 using tcu::Vec2;
58 using tcu::Vec4;
59 using std::vector;
60 using std::string;
61 
62 // MultiPlaneImageData
63 
MultiPlaneImageData(VkFormat format,const UVec2 & size)64 MultiPlaneImageData::MultiPlaneImageData (VkFormat format, const UVec2& size)
65 	: m_format		(format)
66 	, m_description	(getPlanarFormatDescription(format))
67 	, m_size		(size)
68 {
69 	for (deUint32 planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
70 		m_planeData[planeNdx].resize(getPlaneSizeInBytes(m_description, size, planeNdx, 0, BUFFER_IMAGE_COPY_OFFSET_GRANULARITY));
71 }
72 
MultiPlaneImageData(const MultiPlaneImageData & other)73 MultiPlaneImageData::MultiPlaneImageData (const MultiPlaneImageData& other)
74 	: m_format		(other.m_format)
75 	, m_description	(other.m_description)
76 	, m_size		(other.m_size)
77 {
78 	for (deUint32 planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
79 		m_planeData[planeNdx] = other.m_planeData[planeNdx];
80 }
81 
~MultiPlaneImageData(void)82 MultiPlaneImageData::~MultiPlaneImageData (void)
83 {
84 }
85 
getChannelAccess(deUint32 channelNdx)86 tcu::PixelBufferAccess MultiPlaneImageData::getChannelAccess (deUint32 channelNdx)
87 {
88 	void*		planePtrs[PlanarFormatDescription::MAX_PLANES];
89 	deUint32	planeRowPitches[PlanarFormatDescription::MAX_PLANES];
90 
91 	for (deUint32 planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
92 	{
93 		const deUint32	planeW		= m_size.x() / ( m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
94 		planeRowPitches[planeNdx]	= m_description.planes[planeNdx].elementSizeBytes * planeW;
95 		planePtrs[planeNdx]			= &m_planeData[planeNdx][0];
96 	}
97 
98 	return vk::getChannelAccess(m_description,
99 								m_size,
100 								planeRowPitches,
101 								planePtrs,
102 								channelNdx);
103 }
104 
getChannelAccess(deUint32 channelNdx) const105 tcu::ConstPixelBufferAccess MultiPlaneImageData::getChannelAccess (deUint32 channelNdx) const
106 {
107 	const void*	planePtrs[PlanarFormatDescription::MAX_PLANES];
108 	deUint32	planeRowPitches[PlanarFormatDescription::MAX_PLANES];
109 
110 	for (deUint32 planeNdx = 0; planeNdx < m_description.numPlanes; ++planeNdx)
111 	{
112 		const deUint32	planeW		= m_size.x() / (m_description.blockWidth * m_description.planes[planeNdx].widthDivisor);
113 		planeRowPitches[planeNdx]	= m_description.planes[planeNdx].elementSizeBytes * planeW;
114 		planePtrs[planeNdx]			= &m_planeData[planeNdx][0];
115 	}
116 
117 	return vk::getChannelAccess(m_description,
118 								m_size,
119 								planeRowPitches,
120 								planePtrs,
121 								channelNdx);
122 }
123 
124 // Misc utilities
125 
126 namespace
127 {
128 
allocateStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)129 void allocateStagingBuffers (const DeviceInterface&			vkd,
130 							 VkDevice						device,
131 							 Allocator&						allocator,
132 							 const MultiPlaneImageData&		imageData,
133 							 vector<VkBufferSp>*			buffers,
134 							 vector<AllocationSp>*			allocations)
135 {
136 	for (deUint32 planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
137 	{
138 		const VkBufferCreateInfo	bufferInfo	=
139 		{
140 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
141 			DE_NULL,
142 			(VkBufferCreateFlags)0u,
143 			(VkDeviceSize)imageData.getPlaneSize(planeNdx),
144 			VK_BUFFER_USAGE_TRANSFER_SRC_BIT|VK_BUFFER_USAGE_TRANSFER_DST_BIT,
145 			VK_SHARING_MODE_EXCLUSIVE,
146 			0u,
147 			(const deUint32*)DE_NULL,
148 		};
149 		Move<VkBuffer>				buffer		(createBuffer(vkd, device, &bufferInfo));
150 		MovePtr<Allocation>			allocation	(allocator.allocate(getBufferMemoryRequirements(vkd, device, *buffer),
151 																	MemoryRequirement::HostVisible|MemoryRequirement::Any));
152 
153 		VK_CHECK(vkd.bindBufferMemory(device, *buffer, allocation->getMemory(), allocation->getOffset()));
154 
155 		buffers->push_back(VkBufferSp(new Unique<VkBuffer>(buffer)));
156 		allocations->push_back(AllocationSp(allocation.release()));
157 	}
158 }
159 
allocateAndWriteStagingBuffers(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,const MultiPlaneImageData & imageData,vector<VkBufferSp> * buffers,vector<AllocationSp> * allocations)160 void allocateAndWriteStagingBuffers (const DeviceInterface&		vkd,
161 									  VkDevice						device,
162 									  Allocator&					allocator,
163 									  const MultiPlaneImageData&	imageData,
164 									  vector<VkBufferSp>*			buffers,
165 									  vector<AllocationSp>*			allocations)
166 {
167 	allocateStagingBuffers(vkd, device, allocator, imageData, buffers, allocations);
168 
169 	for (deUint32 planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
170 	{
171 		deMemcpy((*allocations)[planeNdx]->getHostPtr(), imageData.getPlanePtr(planeNdx), imageData.getPlaneSize(planeNdx));
172 		flushMappedMemoryRange(vkd, device, (*allocations)[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
173 	}
174 }
175 
readStagingBuffers(MultiPlaneImageData * imageData,const DeviceInterface & vkd,VkDevice device,const vector<AllocationSp> & allocations)176 void readStagingBuffers (MultiPlaneImageData*			imageData,
177 						 const DeviceInterface&			vkd,
178 						 VkDevice						device,
179 						 const vector<AllocationSp>&	allocations)
180 {
181 	for (deUint32 planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
182 	{
183 		invalidateMappedMemoryRange(vkd, device, allocations[planeNdx]->getMemory(), 0u, VK_WHOLE_SIZE);
184 		deMemcpy(imageData->getPlanePtr(planeNdx), allocations[planeNdx]->getHostPtr(), imageData->getPlaneSize(planeNdx));
185 	}
186 }
187 
188 } // anonymous
189 
checkImageSupport(Context & context,VkFormat format,VkImageCreateFlags createFlags,VkImageTiling tiling)190 void checkImageSupport (Context& context, VkFormat format, VkImageCreateFlags createFlags, VkImageTiling tiling)
191 {
192 	const bool													disjoint	= (createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0;
193 	const VkPhysicalDeviceSamplerYcbcrConversionFeatures		features	= context.getSamplerYcbcrConversionFeatures();
194 	vector<string>												reqExts;
195 
196 	if (!isCoreDeviceExtension(context.getUsedApiVersion(), "VK_KHR_sampler_ycbcr_conversion"))
197 		reqExts.push_back("VK_KHR_sampler_ycbcr_conversion");
198 
199 	if (disjoint)
200 	{
201 		if (!isCoreDeviceExtension(context.getUsedApiVersion(), "VK_KHR_bind_memory2"))
202 			reqExts.push_back("VK_KHR_bind_memory2");
203 		if (!isCoreDeviceExtension(context.getUsedApiVersion(), "VK_KHR_get_memory_requirements2"))
204 			reqExts.push_back("VK_KHR_get_memory_requirements2");
205 	}
206 
207 	for (const string& ext : reqExts)
208 		context.requireDeviceFunctionality(ext);
209 
210 	if (features.samplerYcbcrConversion == VK_FALSE)
211 	{
212 		TCU_THROW(NotSupportedError, "samplerYcbcrConversion is not supported");
213 	}
214 
215 	{
216 		const VkFormatProperties	formatProperties	= getPhysicalDeviceFormatProperties(context.getInstanceInterface(),
217 																							context.getPhysicalDevice(),
218 																							format);
219 		const VkFormatFeatureFlags	featureFlags		= tiling == VK_IMAGE_TILING_OPTIMAL
220 														? formatProperties.optimalTilingFeatures
221 														: formatProperties.linearTilingFeatures;
222 
223 		if ((featureFlags & (VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT | VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT)) == 0)
224 			TCU_THROW(NotSupportedError, "YCbCr conversion is not supported for format");
225 
226 		if (disjoint && ((featureFlags & VK_FORMAT_FEATURE_DISJOINT_BIT) == 0))
227 			TCU_THROW(NotSupportedError, "Disjoint planes are not supported for format");
228 	}
229 }
230 
231 // When noNan is true, fillRandom does not generate NaNs in float formats.
232 // But as a side effect, it also takes out infinities as well as almost half of the largest-magnitude values.
fillRandom(de::Random * randomGen,MultiPlaneImageData * imageData,const vk::VkFormat format,const bool noNan)233 void fillRandom (de::Random* randomGen, MultiPlaneImageData* imageData, const vk::VkFormat format, const bool noNan)
234 {
235 	// \todo [pyry] Optimize, take into account bits that must be 0
236 
237 	deUint8 mask, maskStride;
238 	const deUint8 noMask = 0xffu;
239 
240 	switch (format)
241 	{
242 		case vk::VK_FORMAT_B10G11R11_UFLOAT_PACK32:
243 			mask		= 0xbb;
244 			maskStride	= 1;
245 			break;
246 		case vk::VK_FORMAT_R16_SFLOAT:
247 		case vk::VK_FORMAT_R16G16_SFLOAT:
248 		case vk::VK_FORMAT_R16G16B16_SFLOAT:
249 		case vk::VK_FORMAT_R16G16B16A16_SFLOAT:
250 			mask		= 0xbf;
251 			maskStride	= 2;
252 			break;
253 		case vk::VK_FORMAT_R32_SFLOAT:
254 		case vk::VK_FORMAT_R32G32_SFLOAT:
255 		case vk::VK_FORMAT_R32G32B32_SFLOAT:
256 		case vk::VK_FORMAT_R32G32B32A32_SFLOAT:
257 			mask		= 0xbf;
258 			maskStride	= 4;
259 			break;
260 		case vk::VK_FORMAT_R64_SFLOAT:
261 		case vk::VK_FORMAT_R64G64_SFLOAT:
262 		case vk::VK_FORMAT_R64G64B64_SFLOAT:
263 		case vk::VK_FORMAT_R64G64B64A64_SFLOAT:
264 			mask		= 0xbf;
265 			maskStride	= 8;
266 			break;
267 		default:
268 			mask		= 0xff;
269 			maskStride	= 1;
270 			break;
271 	}
272 
273 	for (deUint32 planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
274 	{
275 		const size_t	planeSize	= imageData->getPlaneSize(planeNdx);
276 		deUint8* const	planePtr	= (deUint8*)imageData->getPlanePtr(planeNdx);
277 
278 		for (size_t ndx = 0; ndx < planeSize; ++ndx)
279 		{
280 			const deUint8 finalMask = (noNan && ((ndx % static_cast<size_t>(maskStride)) == 0u)) ? mask : noMask;
281 
282 			planePtr[ndx] = randomGen->getUint8() & finalMask;
283 		}
284 	}
285 }
286 
fillGradient(MultiPlaneImageData * imageData,const tcu::Vec4 & minVal,const tcu::Vec4 & maxVal)287 void fillGradient (MultiPlaneImageData* imageData, const tcu::Vec4& minVal, const tcu::Vec4& maxVal)
288 {
289 	const PlanarFormatDescription&	formatInfo	= imageData->getDescription();
290 
291 	// \todo [pyry] Optimize: no point in re-rendering source gradient for each channel.
292 
293 	for (deUint32 channelNdx = 0; channelNdx < 4; channelNdx++)
294 	{
295 		if (formatInfo.hasChannelNdx(channelNdx))
296 		{
297 			const tcu::PixelBufferAccess		channelAccess	= imageData->getChannelAccess(channelNdx);
298 			tcu::TextureLevel					tmpTexture		(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::FLOAT),  channelAccess.getWidth(), channelAccess.getHeight());
299 			const tcu::ConstPixelBufferAccess	tmpAccess		= tmpTexture.getAccess();
300 
301 			tcu::fillWithComponentGradients(tmpTexture, minVal, maxVal);
302 
303 			for (int y = 0; y < channelAccess.getHeight(); ++y)
304 			for (int x = 0; x < channelAccess.getWidth(); ++x)
305 			{
306 				channelAccess.setPixel(tcu::Vec4(tmpAccess.getPixel(x, y)[channelNdx]), x, y);
307 			}
308 		}
309 	}
310 }
311 
fillZero(MultiPlaneImageData * imageData)312 void fillZero (MultiPlaneImageData* imageData)
313 {
314 	for (deUint32 planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
315 		deMemset(imageData->getPlanePtr(planeNdx), 0, imageData->getPlaneSize(planeNdx));
316 }
317 
allocateAndBindImageMemory(const DeviceInterface & vkd,VkDevice device,Allocator & allocator,VkImage image,VkFormat format,VkImageCreateFlags createFlags,vk::MemoryRequirement requirement)318 vector<AllocationSp> allocateAndBindImageMemory (const DeviceInterface&	vkd,
319 												 VkDevice				device,
320 												 Allocator&				allocator,
321 												 VkImage				image,
322 												 VkFormat				format,
323 												 VkImageCreateFlags		createFlags,
324 												 vk::MemoryRequirement	requirement)
325 {
326 	vector<AllocationSp> allocations;
327 
328 	if ((createFlags & VK_IMAGE_CREATE_DISJOINT_BIT) != 0)
329 	{
330 		const deUint32	numPlanes	= getPlaneCount(format);
331 
332 		bindImagePlanesMemory(vkd, device, image, numPlanes, allocations, allocator, requirement);
333 	}
334 	else
335 	{
336 		const VkMemoryRequirements	reqs	= getImageMemoryRequirements(vkd, device, image);
337 
338 		allocations.push_back(AllocationSp(allocator.allocate(reqs, requirement).release()));
339 
340 		VK_CHECK(vkd.bindImageMemory(device, image, allocations.back()->getMemory(), allocations.back()->getOffset()));
341 	}
342 
343 	return allocations;
344 }
345 
uploadImage(const DeviceInterface & vkd,VkDevice device,deUint32 queueFamilyNdx,Allocator & allocator,VkImage image,const MultiPlaneImageData & imageData,VkAccessFlags nextAccess,VkImageLayout finalLayout,deUint32 arrayLayer)346 void uploadImage (const DeviceInterface&		vkd,
347 				  VkDevice						device,
348 				  deUint32						queueFamilyNdx,
349 				  Allocator&					allocator,
350 				  VkImage						image,
351 				  const MultiPlaneImageData&	imageData,
352 				  VkAccessFlags					nextAccess,
353 				  VkImageLayout					finalLayout,
354 				  deUint32						arrayLayer)
355 {
356 	const VkQueue					queue			= getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
357 	const Unique<VkCommandPool>		cmdPool			(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
358 	const Unique<VkCommandBuffer>	cmdBuffer		(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
359 	vector<VkBufferSp>				stagingBuffers;
360 	vector<AllocationSp>			stagingMemory;
361 
362 	const PlanarFormatDescription&	formatDesc		= imageData.getDescription();
363 
364 	allocateAndWriteStagingBuffers(vkd, device, allocator, imageData, &stagingBuffers, &stagingMemory);
365 
366 	beginCommandBuffer(vkd, *cmdBuffer);
367 
368 	for (deUint32 planeNdx = 0; planeNdx < imageData.getDescription().numPlanes; ++planeNdx)
369 	{
370 		const VkImageAspectFlagBits	aspect	= (formatDesc.numPlanes > 1)
371 											? getPlaneAspect(planeNdx)
372 											: VK_IMAGE_ASPECT_COLOR_BIT;
373 		const VkExtent3D imageExtent		= makeExtent3D(imageData.getSize().x(), imageData.getSize().y(), 1u);
374 		const VkExtent3D planeExtent		= getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
375 		const VkBufferImageCopy		copy	=
376 		{
377 			0u,		// bufferOffset
378 			0u,		// bufferRowLength
379 			0u,		// bufferImageHeight
380 			{ (VkImageAspectFlags)aspect, 0u, arrayLayer, 1u },
381 			makeOffset3D(0u, 0u, 0u),
382 			planeExtent
383 		};
384 
385 		{
386 			const VkImageMemoryBarrier		preCopyBarrier	=
387 				{
388 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
389 					DE_NULL,
390 					(VkAccessFlags)0,
391 					VK_ACCESS_TRANSFER_WRITE_BIT,
392 					VK_IMAGE_LAYOUT_UNDEFINED,
393 					VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
394 					VK_QUEUE_FAMILY_IGNORED,
395 					VK_QUEUE_FAMILY_IGNORED,
396 					image,
397 					{ (VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u }
398 				};
399 
400 			vkd.cmdPipelineBarrier(*cmdBuffer,
401 								   (VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
402 								   (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
403 								   (VkDependencyFlags)0u,
404 								   0u,
405 								   (const VkMemoryBarrier*)DE_NULL,
406 								   0u,
407 								   (const VkBufferMemoryBarrier*)DE_NULL,
408 								   1u,
409 								   &preCopyBarrier);
410 		}
411 
412 		vkd.cmdCopyBufferToImage(*cmdBuffer, **stagingBuffers[planeNdx], image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &copy);
413 
414 		{
415 			const VkImageMemoryBarrier		postCopyBarrier	=
416 				{
417 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
418 					DE_NULL,
419 					VK_ACCESS_TRANSFER_WRITE_BIT,
420 					nextAccess,
421 					VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
422 					finalLayout,
423 					VK_QUEUE_FAMILY_IGNORED,
424 					VK_QUEUE_FAMILY_IGNORED,
425 					image,
426 					{ (VkImageAspectFlags)aspect, 0u, 1u, arrayLayer, 1u }
427 				};
428 
429 			vkd.cmdPipelineBarrier(*cmdBuffer,
430 								   (VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
431 								   (VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
432 								   (VkDependencyFlags)0u,
433 								   0u,
434 								   (const VkMemoryBarrier*)DE_NULL,
435 								   0u,
436 								   (const VkBufferMemoryBarrier*)DE_NULL,
437 								   1u,
438 								   &postCopyBarrier);
439 		}
440 
441 	}
442 
443 	endCommandBuffer(vkd, *cmdBuffer);
444 
445 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
446 }
447 
fillImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,deUint32 queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,const MultiPlaneImageData & imageData,vk::VkAccessFlags nextAccess,vk::VkImageLayout finalLayout,deUint32 arrayLayer)448 void fillImageMemory (const vk::DeviceInterface&							vkd,
449 					  vk::VkDevice											device,
450 					  deUint32												queueFamilyNdx,
451 					  vk::VkImage											image,
452 					  const std::vector<de::SharedPtr<vk::Allocation> >&	allocations,
453 					  const MultiPlaneImageData&							imageData,
454 					  vk::VkAccessFlags										nextAccess,
455 					  vk::VkImageLayout										finalLayout,
456 					  deUint32												arrayLayer)
457 {
458 	const VkQueue					queue			= getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
459 	const Unique<VkCommandPool>		cmdPool			(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
460 	const Unique<VkCommandBuffer>	cmdBuffer		(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
461 	const PlanarFormatDescription&	formatDesc		= imageData.getDescription();
462 
463 	for (deUint32 planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
464 	{
465 		const VkImageAspectFlagBits			aspect		= (formatDesc.numPlanes > 1)
466 														? getPlaneAspect(planeNdx)
467 														: VK_IMAGE_ASPECT_COLOR_BIT;
468 		const de::SharedPtr<Allocation>&	allocation	= allocations.size() > 1
469 														? allocations[planeNdx]
470 														: allocations[0];
471 		const size_t						planeSize	= imageData.getPlaneSize(planeNdx);
472 		const deUint32						planeH		= imageData.getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
473 		const VkImageSubresource			subresource	=
474 		{
475 			static_cast<vk::VkImageAspectFlags>(aspect),
476 			0u,
477 			arrayLayer,
478 		};
479 		VkSubresourceLayout			layout;
480 
481 		vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
482 
483 		for (deUint32 row = 0; row < planeH; ++row)
484 		{
485 			const size_t		rowSize		= planeSize / planeH;
486 			void* const			dstPtr		= ((deUint8*)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
487 			const void* const	srcPtr		= ((const deUint8*)imageData.getPlanePtr(planeNdx)) + row * rowSize;
488 
489 			deMemcpy(dstPtr, srcPtr, rowSize);
490 		}
491 		flushMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
492 	}
493 
494 	beginCommandBuffer(vkd, *cmdBuffer);
495 
496 	{
497 		const VkImageMemoryBarrier		postCopyBarrier	=
498 		{
499 			VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
500 			DE_NULL,
501 			0u,
502 			nextAccess,
503 			VK_IMAGE_LAYOUT_PREINITIALIZED,
504 			finalLayout,
505 			VK_QUEUE_FAMILY_IGNORED,
506 			VK_QUEUE_FAMILY_IGNORED,
507 			image,
508 			{ VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, arrayLayer, 1u }
509 		};
510 
511 		vkd.cmdPipelineBarrier(*cmdBuffer,
512 								(VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
513 								(VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
514 								(VkDependencyFlags)0u,
515 								0u,
516 								(const VkMemoryBarrier*)DE_NULL,
517 								0u,
518 								(const VkBufferMemoryBarrier*)DE_NULL,
519 								1u,
520 								&postCopyBarrier);
521 	}
522 
523 	endCommandBuffer(vkd, *cmdBuffer);
524 
525 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
526 }
527 
downloadImage(const DeviceInterface & vkd,VkDevice device,deUint32 queueFamilyNdx,Allocator & allocator,VkImage image,MultiPlaneImageData * imageData,VkAccessFlags prevAccess,VkImageLayout initialLayout)528 void downloadImage (const DeviceInterface&	vkd,
529 					VkDevice				device,
530 					deUint32				queueFamilyNdx,
531 					Allocator&				allocator,
532 					VkImage					image,
533 					MultiPlaneImageData*	imageData,
534 					VkAccessFlags			prevAccess,
535 					VkImageLayout			initialLayout)
536 {
537 	const VkQueue					queue			= getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
538 	const Unique<VkCommandPool>		cmdPool			(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
539 	const Unique<VkCommandBuffer>	cmdBuffer		(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
540 	vector<VkBufferSp>				stagingBuffers;
541 	vector<AllocationSp>			stagingMemory;
542 
543 	const PlanarFormatDescription&	formatDesc		= imageData->getDescription();
544 
545 	allocateStagingBuffers(vkd, device, allocator, *imageData, &stagingBuffers, &stagingMemory);
546 
547 	beginCommandBuffer(vkd, *cmdBuffer);
548 
549 	for (deUint32 planeNdx = 0; planeNdx < imageData->getDescription().numPlanes; ++planeNdx)
550 	{
551 		const VkImageAspectFlagBits	aspect	= (formatDesc.numPlanes > 1)
552 											? getPlaneAspect(planeNdx)
553 											: VK_IMAGE_ASPECT_COLOR_BIT;
554 		{
555 			const VkImageMemoryBarrier		preCopyBarrier	=
556 			{
557 				VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
558 				DE_NULL,
559 				prevAccess,
560 				VK_ACCESS_TRANSFER_READ_BIT,
561 				initialLayout,
562 				VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
563 				VK_QUEUE_FAMILY_IGNORED,
564 				VK_QUEUE_FAMILY_IGNORED,
565 				image,
566 				{
567 					static_cast<vk::VkImageAspectFlags>(aspect),
568 					0u,
569 					1u,
570 					0u,
571 					1u
572 				}
573 			};
574 
575 			vkd.cmdPipelineBarrier(*cmdBuffer,
576 									(VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
577 									(VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
578 									(VkDependencyFlags)0u,
579 									0u,
580 									(const VkMemoryBarrier*)DE_NULL,
581 									0u,
582 									(const VkBufferMemoryBarrier*)DE_NULL,
583 									1u,
584 									&preCopyBarrier);
585 		}
586 		{
587 			const VkExtent3D imageExtent		= makeExtent3D(imageData->getSize().x(), imageData->getSize().y(), 1u);
588 			const VkExtent3D planeExtent		= getPlaneExtent(formatDesc, imageExtent, planeNdx, 0);
589 			const VkBufferImageCopy		copy	=
590 			{
591 				0u,		// bufferOffset
592 				0u,		// bufferRowLength
593 				0u,		// bufferImageHeight
594 				{ (VkImageAspectFlags)aspect, 0u, 0u, 1u },
595 				makeOffset3D(0u, 0u, 0u),
596 				planeExtent
597 			};
598 
599 			vkd.cmdCopyImageToBuffer(*cmdBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, **stagingBuffers[planeNdx], 1u, &copy);
600 		}
601 		{
602 			const VkBufferMemoryBarrier		postCopyBarrier	=
603 			{
604 				VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
605 				DE_NULL,
606 				VK_ACCESS_TRANSFER_WRITE_BIT,
607 				VK_ACCESS_HOST_READ_BIT,
608 				VK_QUEUE_FAMILY_IGNORED,
609 				VK_QUEUE_FAMILY_IGNORED,
610 				**stagingBuffers[planeNdx],
611 				0u,
612 				VK_WHOLE_SIZE
613 			};
614 
615 			vkd.cmdPipelineBarrier(*cmdBuffer,
616 									(VkPipelineStageFlags)VK_PIPELINE_STAGE_TRANSFER_BIT,
617 									(VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
618 									(VkDependencyFlags)0u,
619 									0u,
620 									(const VkMemoryBarrier*)DE_NULL,
621 									1u,
622 									&postCopyBarrier,
623 									0u,
624 									(const VkImageMemoryBarrier*)DE_NULL);
625 		}
626 	}
627 
628 	endCommandBuffer(vkd, *cmdBuffer);
629 
630 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
631 
632 	readStagingBuffers(imageData, vkd, device, stagingMemory);
633 }
634 
readImageMemory(const vk::DeviceInterface & vkd,vk::VkDevice device,deUint32 queueFamilyNdx,vk::VkImage image,const std::vector<de::SharedPtr<vk::Allocation>> & allocations,MultiPlaneImageData * imageData,vk::VkAccessFlags prevAccess,vk::VkImageLayout initialLayout)635 void readImageMemory (const vk::DeviceInterface&							vkd,
636 					  vk::VkDevice											device,
637 					  deUint32												queueFamilyNdx,
638 					  vk::VkImage											image,
639 					  const std::vector<de::SharedPtr<vk::Allocation> >&	allocations,
640 					  MultiPlaneImageData*									imageData,
641 					  vk::VkAccessFlags										prevAccess,
642 					  vk::VkImageLayout										initialLayout)
643 {
644 	const VkQueue					queue			= getDeviceQueue(vkd, device, queueFamilyNdx, 0u);
645 	const Unique<VkCommandPool>		cmdPool			(createCommandPool(vkd, device, (VkCommandPoolCreateFlags)0, queueFamilyNdx));
646 	const Unique<VkCommandBuffer>	cmdBuffer		(allocateCommandBuffer(vkd, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
647 	const PlanarFormatDescription&	formatDesc		= imageData->getDescription();
648 
649 	beginCommandBuffer(vkd, *cmdBuffer);
650 
651 	{
652 		const VkImageMemoryBarrier		preCopyBarrier	=
653 		{
654 			VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
655 			DE_NULL,
656 			prevAccess,
657 			vk::VK_ACCESS_HOST_READ_BIT,
658 			initialLayout,
659 			VK_IMAGE_LAYOUT_GENERAL,
660 			VK_QUEUE_FAMILY_IGNORED,
661 			VK_QUEUE_FAMILY_IGNORED,
662 			image,
663 			{ VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u }
664 		};
665 
666 		vkd.cmdPipelineBarrier(*cmdBuffer,
667 								(VkPipelineStageFlags)VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
668 								(VkPipelineStageFlags)VK_PIPELINE_STAGE_HOST_BIT,
669 								(VkDependencyFlags)0u,
670 								0u,
671 								(const VkMemoryBarrier*)DE_NULL,
672 								0u,
673 								(const VkBufferMemoryBarrier*)DE_NULL,
674 								1u,
675 								&preCopyBarrier);
676 	}
677 
678 	endCommandBuffer(vkd, *cmdBuffer);
679 
680 	submitCommandsAndWait(vkd, device, queue, *cmdBuffer);
681 
682 	for (deUint32 planeNdx = 0; planeNdx < formatDesc.numPlanes; ++planeNdx)
683 	{
684 		const VkImageAspectFlagBits			aspect		= (formatDesc.numPlanes > 1)
685 														? getPlaneAspect(planeNdx)
686 														: VK_IMAGE_ASPECT_COLOR_BIT;
687 		const de::SharedPtr<Allocation>&	allocation	= allocations.size() > 1
688 														? allocations[planeNdx]
689 														: allocations[0];
690 		const size_t						planeSize	= imageData->getPlaneSize(planeNdx);
691 		const deUint32						planeH		= imageData->getSize().y() / formatDesc.planes[planeNdx].heightDivisor;
692 		const VkImageSubresource			subresource	=
693 		{
694 			static_cast<vk::VkImageAspectFlags>(aspect),
695 			0u,
696 			0u,
697 		};
698 		VkSubresourceLayout			layout;
699 
700 		vkd.getImageSubresourceLayout(device, image, &subresource, &layout);
701 
702 		invalidateMappedMemoryRange(vkd, device, allocation->getMemory(), 0u, VK_WHOLE_SIZE);
703 
704 		for (deUint32 row = 0; row < planeH; ++row)
705 		{
706 			const size_t		rowSize	= planeSize / planeH;
707 			const void* const	srcPtr	= ((const deUint8*)allocation->getHostPtr()) + layout.offset + layout.rowPitch * row;
708 			void* const			dstPtr	= ((deUint8*)imageData->getPlanePtr(planeNdx)) + row * rowSize;
709 
710 			deMemcpy(dstPtr, srcPtr, rowSize);
711 		}
712 	}
713 }
714 
715 // ChannelAccess utilities
716 namespace
717 {
718 
719 //! Extend < 32b signed integer to 32b
signExtend(deUint32 src,int bits)720 inline deInt32 signExtend (deUint32 src, int bits)
721 {
722 	const deUint32 signBit = 1u << (bits-1);
723 
724 	src |= ~((src & signBit) - 1);
725 
726 	return (deInt32)src;
727 }
728 
divRoundUp(deUint32 a,deUint32 b)729 deUint32 divRoundUp (deUint32 a, deUint32 b)
730 {
731 	if (a % b == 0)
732 		return a / b;
733 	else
734 		return (a / b) + 1;
735 }
736 
737 // \todo Taken from tcuTexture.cpp
738 // \todo [2011-09-21 pyry] Move to tcutil?
739 template <typename T>
convertSatRte(float f)740 inline T convertSatRte (float f)
741 {
742 	// \note Doesn't work for 64-bit types
743 	DE_STATIC_ASSERT(sizeof(T) < sizeof(deUint64));
744 	DE_STATIC_ASSERT((-3 % 2 != 0) && (-4 % 2 == 0));
745 
746 	deInt64	minVal	= std::numeric_limits<T>::min();
747 	deInt64 maxVal	= std::numeric_limits<T>::max();
748 	float	q		= deFloatFrac(f);
749 	deInt64 intVal	= (deInt64)(f-q);
750 
751 	// Rounding.
752 	if (q == 0.5f)
753 	{
754 		if (intVal % 2 != 0)
755 			intVal++;
756 	}
757 	else if (q > 0.5f)
758 		intVal++;
759 	// else Don't add anything
760 
761 	// Saturate.
762 	intVal = de::max(minVal, de::min(maxVal, intVal));
763 
764 	return (T)intVal;
765 }
766 
767 } // anonymous
768 
ChannelAccess(tcu::TextureChannelClass channelClass,deUint8 channelSize,const tcu::IVec3 & size,const tcu::IVec3 & bitPitch,void * data,deUint32 bitOffset)769 ChannelAccess::ChannelAccess (tcu::TextureChannelClass	channelClass,
770 							  deUint8					channelSize,
771 							  const tcu::IVec3&			size,
772 							  const tcu::IVec3&			bitPitch,
773 							  void*						data,
774 							  deUint32					bitOffset)
775 	: m_channelClass	(channelClass)
776 	, m_channelSize		(channelSize)
777 	, m_size			(size)
778 	, m_bitPitch		(bitPitch)
779 	, m_data			((deUint8*)data + (bitOffset / 8))
780 	, m_bitOffset		(bitOffset % 8)
781 {
782 }
783 
getChannelUint(const tcu::IVec3 & pos) const784 deUint32 ChannelAccess::getChannelUint (const tcu::IVec3& pos) const
785 {
786 	DE_ASSERT(pos[0] < m_size[0]);
787 	DE_ASSERT(pos[1] < m_size[1]);
788 	DE_ASSERT(pos[2] < m_size[2]);
789 
790 	const deInt32			bitOffset	(m_bitOffset + tcu::dot(m_bitPitch, pos));
791 	const deUint8* const	firstByte	= ((const deUint8*)m_data) + (bitOffset / 8);
792 	const deUint32			byteCount	= divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
793 	const deUint32			mask		(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
794 	const deUint32			offset		= bitOffset % 8;
795 	deUint32				bits		= 0u;
796 
797 	deMemcpy(&bits, firstByte, byteCount);
798 
799 	return (bits >> offset) & mask;
800 }
801 
setChannel(const tcu::IVec3 & pos,deUint32 x)802 void ChannelAccess::setChannel (const tcu::IVec3& pos, deUint32 x)
803 {
804 	DE_ASSERT(pos[0] < m_size[0]);
805 	DE_ASSERT(pos[1] < m_size[1]);
806 	DE_ASSERT(pos[2] < m_size[2]);
807 
808 	const deInt32	bitOffset	(m_bitOffset + tcu::dot(m_bitPitch, pos));
809 	deUint8* const	firstByte	= ((deUint8*)m_data) + (bitOffset / 8);
810 	const deUint32	byteCount	= divRoundUp((bitOffset + m_channelSize) - 8u * (bitOffset / 8u), 8u);
811 	const deUint32	mask		(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
812 	const deUint32	offset		= bitOffset % 8;
813 
814 	const deUint32	bits		= (x & mask) << offset;
815 	deUint32		oldBits		= 0;
816 
817 	deMemcpy(&oldBits, firstByte, byteCount);
818 
819 	{
820 		const deUint32	newBits	= bits | (oldBits & (~(mask << offset)));
821 
822 		deMemcpy(firstByte, &newBits,  byteCount);
823 	}
824 }
825 
getChannel(const tcu::IVec3 & pos) const826 float ChannelAccess::getChannel (const tcu::IVec3& pos) const
827 {
828 	const deUint32	bits	(getChannelUint(pos));
829 
830 	switch (m_channelClass)
831 	{
832 		case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
833 			return (float)bits / (float)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u));
834 
835 		case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
836 			return (float)bits;
837 
838 		case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
839 			return de::max(-1.0f, (float)signExtend(bits, m_channelSize) / (float)((0x1u << (m_channelSize - 1u)) - 1u));
840 
841 		case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
842 			return (float)signExtend(bits, m_channelSize);
843 
844 		case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
845 			if (m_channelSize == 32)
846 				return tcu::Float32(bits).asFloat();
847 			else
848 			{
849 				DE_FATAL("Float type not supported");
850 				return -1.0f;
851 			}
852 
853 		default:
854 			DE_FATAL("Unknown texture channel class");
855 			return -1.0f;
856 	}
857 }
858 
getChannel(const tcu::FloatFormat & conversionFormat,const tcu::IVec3 & pos) const859 tcu::Interval ChannelAccess::getChannel (const tcu::FloatFormat&	conversionFormat,
860 										 const tcu::IVec3&			pos) const
861 {
862 	const deUint32	bits	(getChannelUint(pos));
863 
864 	switch (m_channelClass)
865 	{
866 		case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
867 			return conversionFormat.roundOut(conversionFormat.roundOut((double)bits, false)
868 											/ conversionFormat.roundOut((double)(m_channelSize == 32 ? ~0x0u : ((0x1u << m_channelSize) - 1u)), false), false);
869 
870 		case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
871 			return conversionFormat.roundOut((double)bits, false);
872 
873 		case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
874 		{
875 			const tcu::Interval result (conversionFormat.roundOut(conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false)
876 																/ conversionFormat.roundOut((double)((0x1u << (m_channelSize - 1u)) - 1u), false), false));
877 
878 			return tcu::Interval(de::max(-1.0, result.lo()), de::max(-1.0, result.hi()));
879 		}
880 
881 		case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
882 			return conversionFormat.roundOut((double)signExtend(bits, m_channelSize), false);
883 
884 		case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
885 			if (m_channelSize == 32)
886 				return conversionFormat.roundOut(tcu::Float32(bits).asFloat(), false);
887 			else
888 			{
889 				DE_FATAL("Float type not supported");
890 				return tcu::Interval();
891 			}
892 
893 		default:
894 			DE_FATAL("Unknown texture channel class");
895 			return tcu::Interval();
896 	}
897 }
898 
setChannel(const tcu::IVec3 & pos,float x)899 void ChannelAccess::setChannel (const tcu::IVec3& pos, float x)
900 {
901 	DE_ASSERT(pos[0] < m_size[0]);
902 	DE_ASSERT(pos[1] < m_size[1]);
903 	DE_ASSERT(pos[2] < m_size[2]);
904 
905 	const deUint32	mask	(m_channelSize == 32u ? ~0x0u : (0x1u << m_channelSize) - 1u);
906 
907 	switch (m_channelClass)
908 	{
909 		case tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT:
910 		{
911 			const deUint32	maxValue	(mask);
912 			const deUint32	value		(de::min(maxValue, (deUint32)convertSatRte<deUint32>(x * (float)maxValue)));
913 			setChannel(pos, value);
914 			break;
915 		}
916 
917 		case tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT:
918 		{
919 			const deInt32	range	((0x1u << (m_channelSize - 1u)) - 1u);
920 			const deUint32	value	((deUint32)de::clamp<deInt32>(convertSatRte<deInt32>(x * (float)range), -range, range));
921 			setChannel(pos, value);
922 			break;
923 		}
924 
925 		case tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER:
926 		{
927 			const deUint32	maxValue	(mask);
928 			const deUint32	value		(de::min(maxValue, (deUint32)x));
929 			setChannel(pos, value);
930 			break;
931 		}
932 
933 		case tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER:
934 		{
935 			const deInt32	minValue	(-(deInt32)(1u << (m_channelSize - 1u)));
936 			const deInt32	maxValue	((deInt32)((1u << (m_channelSize - 1u)) - 1u));
937 			const deUint32	value		((deUint32)de::clamp((deInt32)x, minValue, maxValue));
938 			setChannel(pos, value);
939 			break;
940 		}
941 
942 		case tcu::TEXTURECHANNELCLASS_FLOATING_POINT:
943 		{
944 			if (m_channelSize == 32)
945 			{
946 				const deUint32	value		= tcu::Float32(x).bits();
947 				setChannel(pos, value);
948 			}
949 			else
950 				DE_FATAL("Float type not supported");
951 			break;
952 		}
953 
954 		default:
955 			DE_FATAL("Unknown texture channel class");
956 	}
957 }
958 
getChannelAccess(MultiPlaneImageData & data,const vk::PlanarFormatDescription & formatInfo,const UVec2 & size,int channelNdx)959 ChannelAccess getChannelAccess (MultiPlaneImageData&				data,
960 								const vk::PlanarFormatDescription&	formatInfo,
961 								const UVec2&						size,
962 								int									channelNdx)
963 {
964 	DE_ASSERT(formatInfo.hasChannelNdx(channelNdx));
965 
966 	const deUint32	planeNdx			= formatInfo.channels[channelNdx].planeNdx;
967 	const deUint32	valueOffsetBits		= formatInfo.channels[channelNdx].offsetBits;
968 	const deUint32	pixelStrideBytes	= formatInfo.channels[channelNdx].strideBytes;
969 	const deUint32	pixelStrideBits		= pixelStrideBytes * 8;
970 	const deUint8	sizeBits			= formatInfo.channels[channelNdx].sizeBits;
971 
972 	DE_ASSERT(size.x() % (formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor) == 0);
973 	DE_ASSERT(size.y() % (formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor) == 0);
974 
975 	deUint32		accessWidth			= size.x() / ( formatInfo.blockWidth * formatInfo.planes[planeNdx].widthDivisor );
976 	const deUint32	accessHeight		= size.y() / ( formatInfo.blockHeight * formatInfo.planes[planeNdx].heightDivisor );
977 	const deUint32	elementSizeBytes	= formatInfo.planes[planeNdx].elementSizeBytes;
978 	const deUint32	rowPitch			= formatInfo.planes[planeNdx].elementSizeBytes * accessWidth;
979 	const deUint32	rowPitchBits		= rowPitch * 8;
980 
981 	if (pixelStrideBytes != elementSizeBytes)
982 	{
983 		DE_ASSERT(elementSizeBytes % pixelStrideBytes == 0);
984 		accessWidth *= elementSizeBytes/pixelStrideBytes;
985 	}
986 
987 	return ChannelAccess((tcu::TextureChannelClass)formatInfo.channels[channelNdx].type, sizeBits, tcu::IVec3(accessWidth, accessHeight, 1u), tcu::IVec3((int)pixelStrideBits, (int)rowPitchBits, 0), data.getPlanePtr(planeNdx), (deUint32)valueOffsetBits);
988 }
989 
isXChromaSubsampled(vk::VkFormat format)990 bool isXChromaSubsampled (vk::VkFormat format)
991 {
992 	switch (format)
993 	{
994 		case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
995 		case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
996 		case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
997 		case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
998 		case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
999 		case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1000 		case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1001 		case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1002 		case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1003 		case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1004 		case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1005 		case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1006 		case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1007 		case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1008 		case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1009 		case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1010 		case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1011 		case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1012 		case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1013 		case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1014 		case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1015 		case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1016 		case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1017 		case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1018 			return true;
1019 
1020 		default:
1021 			return false;
1022 	}
1023 }
1024 
isYChromaSubsampled(vk::VkFormat format)1025 bool isYChromaSubsampled (vk::VkFormat format)
1026 {
1027 	switch (format)
1028 	{
1029 		case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1030 		case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1031 		case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1032 		case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1033 		case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1034 		case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1035 		case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1036 		case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1037 			return true;
1038 
1039 		default:
1040 			return false;
1041 	}
1042 }
1043 
areLsb6BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1044 bool areLsb6BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1045 {
1046 	if ((srcFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16)	                        ||
1047 		(dstFormat == vk::VK_FORMAT_R10X6_UNORM_PACK16)                         ||
1048 		(srcFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16)                   ||
1049 		(dstFormat == vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16)                   ||
1050 		(srcFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16)         ||
1051 		(dstFormat == vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16)         ||
1052 		(srcFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16)     ||
1053 		(dstFormat == vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16)     ||
1054 		(srcFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16)     ||
1055 		(dstFormat == vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16)     ||
1056 		(srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16)  ||
1057 		(dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16)  ||
1058 		(srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
1059 		(dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16) ||
1060 		(srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
1061 		(dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16) ||
1062 		(srcFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16)  ||
1063 		(dstFormat == vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16)  ||
1064 		(srcFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16) ||
1065 		(dstFormat == vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16))
1066 	{
1067 		return true;
1068 	}
1069 
1070 	return false;
1071 }
1072 
areLsb4BitsDontCare(vk::VkFormat srcFormat,vk::VkFormat dstFormat)1073 bool areLsb4BitsDontCare(vk::VkFormat srcFormat, vk::VkFormat dstFormat)
1074 {
1075 	if ((srcFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16)                         ||
1076 		(dstFormat == vk::VK_FORMAT_R12X4_UNORM_PACK16)                         ||
1077 		(srcFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16)                   ||
1078 		(dstFormat == vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16)                   ||
1079 		(srcFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16)         ||
1080 		(dstFormat == vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16)         ||
1081 		(srcFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16)     ||
1082 		(dstFormat == vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16)     ||
1083 		(srcFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16)     ||
1084 		(dstFormat == vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16)     ||
1085 		(srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1086 		(dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16) ||
1087 		(srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16)  ||
1088 		(dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16)  ||
1089 		(srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1090 		(dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16) ||
1091 		(srcFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16)  ||
1092 		(dstFormat == vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16)  ||
1093 		(srcFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16) ||
1094 		(dstFormat == vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16))
1095 	{
1096 		return true;
1097 	}
1098 
1099 	return false;
1100 }
1101 
1102 // \note Used for range expansion
getYCbCrBitDepth(vk::VkFormat format)1103 tcu::UVec4 getYCbCrBitDepth (vk::VkFormat format)
1104 {
1105 	switch (format)
1106 	{
1107 		case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1108 		case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1109 		case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1110 		case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1111 		case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1112 		case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1113 		case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1114 		case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1115 			return tcu::UVec4(8, 8, 8, 0);
1116 
1117 		case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1118 			return tcu::UVec4(10, 0, 0, 0);
1119 
1120 		case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1121 			return tcu::UVec4(10, 10, 0, 0);
1122 
1123 		case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1124 			return tcu::UVec4(10, 10, 10, 10);
1125 
1126 		case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1127 		case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1128 		case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1129 		case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1130 		case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1131 		case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1132 		case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1133 		case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1134 			return tcu::UVec4(10, 10, 10, 0);
1135 
1136 		case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1137 			return tcu::UVec4(12, 0, 0, 0);
1138 
1139 		case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1140 			return tcu::UVec4(12, 12, 0, 0);
1141 
1142 		case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1143 		case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1144 		case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1145 		case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1146 		case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1147 		case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1148 		case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1149 		case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1150 		case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1151 			return tcu::UVec4(12, 12, 12, 12);
1152 
1153 		case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1154 		case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1155 		case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1156 		case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1157 		case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1158 		case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1159 		case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1160 		case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1161 			return tcu::UVec4(16, 16, 16, 0);
1162 
1163 		default:
1164 			return tcu::getTextureFormatBitDepth(vk::mapVkFormat(format)).cast<deUint32>();
1165 	}
1166 }
1167 
getPrecision(VkFormat format)1168 std::vector<tcu::FloatFormat> getPrecision (VkFormat format)
1169 {
1170 	std::vector<FloatFormat>	floatFormats;
1171 	UVec4						channelDepth	= getYCbCrBitDepth (format);
1172 
1173 	for (deUint32 channelIdx = 0; channelIdx < 4; channelIdx++)
1174 		floatFormats.push_back(tcu::FloatFormat(0, 0, channelDepth[channelIdx], false, tcu::YES));
1175 
1176 	return floatFormats;
1177 }
1178 
getYCbCrFormatChannelCount(vk::VkFormat format)1179 deUint32 getYCbCrFormatChannelCount (vk::VkFormat format)
1180 {
1181 	switch (format)
1182 	{
1183 		case vk::VK_FORMAT_A1R5G5B5_UNORM_PACK16:
1184 		case vk::VK_FORMAT_A2B10G10R10_UNORM_PACK32:
1185 		case vk::VK_FORMAT_A2R10G10B10_UNORM_PACK32:
1186 		case vk::VK_FORMAT_A8B8G8R8_UNORM_PACK32:
1187 		case vk::VK_FORMAT_B4G4R4A4_UNORM_PACK16:
1188 		case vk::VK_FORMAT_B5G5R5A1_UNORM_PACK16:
1189 		case vk::VK_FORMAT_B8G8R8A8_UNORM:
1190 		case vk::VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
1191 		case vk::VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
1192 		case vk::VK_FORMAT_R16G16B16A16_UNORM:
1193 		case vk::VK_FORMAT_R4G4B4A4_UNORM_PACK16:
1194 		case vk::VK_FORMAT_R5G5B5A1_UNORM_PACK16:
1195 		case vk::VK_FORMAT_R8G8B8A8_UNORM:
1196 			return 4;
1197 
1198 		case vk::VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
1199 		case vk::VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
1200 		case vk::VK_FORMAT_B16G16R16G16_422_UNORM:
1201 		case vk::VK_FORMAT_B5G6R5_UNORM_PACK16:
1202 		case vk::VK_FORMAT_B8G8R8G8_422_UNORM:
1203 		case vk::VK_FORMAT_B8G8R8_UNORM:
1204 		case vk::VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
1205 		case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
1206 		case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
1207 		case vk::VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16_EXT:
1208 		case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
1209 		case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
1210 		case vk::VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
1211 		case vk::VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
1212 		case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
1213 		case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
1214 		case vk::VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16_EXT:
1215 		case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
1216 		case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
1217 		case vk::VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
1218 		case vk::VK_FORMAT_G16B16G16R16_422_UNORM:
1219 		case vk::VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
1220 		case vk::VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
1221 		case vk::VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT:
1222 		case vk::VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
1223 		case vk::VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
1224 		case vk::VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
1225 		case vk::VK_FORMAT_G8B8G8R8_422_UNORM:
1226 		case vk::VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1227 		case vk::VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
1228 		case vk::VK_FORMAT_G8_B8R8_2PLANE_444_UNORM_EXT:
1229 		case vk::VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1230 		case vk::VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
1231 		case vk::VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
1232 		case vk::VK_FORMAT_R16G16B16_UNORM:
1233 		case vk::VK_FORMAT_R5G6B5_UNORM_PACK16:
1234 		case vk::VK_FORMAT_R8G8B8_UNORM:
1235 			return 3;
1236 
1237 		case vk::VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
1238 		case vk::VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
1239 			return 2;
1240 
1241 		case vk::VK_FORMAT_R10X6_UNORM_PACK16:
1242 		case vk::VK_FORMAT_R12X4_UNORM_PACK16:
1243 			return 1;
1244 
1245 		default:
1246 			DE_FATAL("Unknown number of channels");
1247 			return -1;
1248 	}
1249 }
1250 
1251 // YCbCr color conversion utilities
1252 namespace
1253 {
1254 
rangeExpandChroma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const deUint32 bits,const tcu::Interval & sample)1255 tcu::Interval rangeExpandChroma (vk::VkSamplerYcbcrRange		range,
1256 								 const tcu::FloatFormat&		conversionFormat,
1257 								 const deUint32					bits,
1258 								 const tcu::Interval&			sample)
1259 {
1260 	const deUint32	values	(0x1u << bits);
1261 
1262 	switch (range)
1263 	{
1264 		case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1265 			return conversionFormat.roundOut(sample - conversionFormat.roundOut(tcu::Interval((double)(0x1u << (bits - 1u)) / (double)((0x1u << bits) - 1u)), false), false);
1266 
1267 		case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1268 		{
1269 			const tcu::Interval	a			(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1270 			const tcu::Interval	dividend	(conversionFormat.roundOut(a - tcu::Interval((double)(128u * (0x1u << (bits - 8u)))), false));
1271 			const tcu::Interval	divisor		((double)(224u * (0x1u << (bits - 8u))));
1272 			const tcu::Interval	result		(conversionFormat.roundOut(dividend / divisor, false));
1273 
1274 			return result;
1275 		}
1276 
1277 		default:
1278 			DE_FATAL("Unknown YCbCrRange");
1279 			return tcu::Interval();
1280 	}
1281 }
1282 
rangeExpandLuma(vk::VkSamplerYcbcrRange range,const tcu::FloatFormat & conversionFormat,const deUint32 bits,const tcu::Interval & sample)1283 tcu::Interval rangeExpandLuma (vk::VkSamplerYcbcrRange		range,
1284 							   const tcu::FloatFormat&		conversionFormat,
1285 							   const deUint32				bits,
1286 							   const tcu::Interval&			sample)
1287 {
1288 	const deUint32	values	(0x1u << bits);
1289 
1290 	switch (range)
1291 	{
1292 		case vk::VK_SAMPLER_YCBCR_RANGE_ITU_FULL:
1293 			return conversionFormat.roundOut(sample, false);
1294 
1295 		case vk::VK_SAMPLER_YCBCR_RANGE_ITU_NARROW:
1296 		{
1297 			const tcu::Interval	a			(conversionFormat.roundOut(sample * tcu::Interval((double)(values - 1u)), false));
1298 			const tcu::Interval	dividend	(conversionFormat.roundOut(a - tcu::Interval((double)(16u * (0x1u << (bits - 8u)))), false));
1299 			const tcu::Interval	divisor		((double)(219u * (0x1u << (bits - 8u))));
1300 			const tcu::Interval	result		(conversionFormat.roundOut(dividend / divisor, false));
1301 
1302 			return result;
1303 		}
1304 
1305 		default:
1306 			DE_FATAL("Unknown YCbCrRange");
1307 			return tcu::Interval();
1308 	}
1309 }
1310 
clampMaybe(const tcu::Interval & x,double min,double max)1311 tcu::Interval clampMaybe (const tcu::Interval&	x,
1312 						  double				min,
1313 						  double				max)
1314 {
1315 	tcu::Interval result = x;
1316 
1317 	DE_ASSERT(min <= max);
1318 
1319 	if (x.lo() < min)
1320 		result = result | tcu::Interval(min);
1321 
1322 	if (x.hi() > max)
1323 		result = result | tcu::Interval(max);
1324 
1325 	return result;
1326 }
1327 
convertColor(vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,const vector<tcu::FloatFormat> & conversionFormat,const tcu::UVec4 & bitDepth,const tcu::Interval input[4],tcu::Interval output[4])1328 void convertColor (vk::VkSamplerYcbcrModelConversion	colorModel,
1329 				   vk::VkSamplerYcbcrRange				range,
1330 				   const vector<tcu::FloatFormat>&		conversionFormat,
1331 				   const tcu::UVec4&					bitDepth,
1332 				   const tcu::Interval					input[4],
1333 				   tcu::Interval						output[4])
1334 {
1335 	switch (colorModel)
1336 	{
1337 		case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY:
1338 		{
1339 			for (size_t ndx = 0; ndx < 4; ndx++)
1340 				output[ndx] = input[ndx];
1341 			break;
1342 		}
1343 
1344 		case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY:
1345 		{
1346 			output[0] = clampMaybe(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]), -0.5, 0.5);
1347 			output[1] = clampMaybe(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]), 0.0, 1.0);
1348 			output[2] = clampMaybe(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]), -0.5, 0.5);
1349 			output[3] = input[3];
1350 			break;
1351 		}
1352 
1353 		case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601:
1354 		case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709:
1355 		case vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_2020:
1356 		{
1357 			const tcu::Interval	y			(rangeExpandLuma(range, conversionFormat[1], bitDepth[1], input[1]));
1358 			const tcu::Interval	cr			(rangeExpandChroma(range, conversionFormat[0], bitDepth[0], input[0]));
1359 			const tcu::Interval	cb			(rangeExpandChroma(range, conversionFormat[2], bitDepth[2], input[2]));
1360 
1361 			const tcu::Interval	yClamped	(clampMaybe(y,   0.0, 1.0));
1362 			const tcu::Interval	crClamped	(clampMaybe(cr, -0.5, 0.5));
1363 			const tcu::Interval	cbClamped	(clampMaybe(cb, -0.5, 0.5));
1364 
1365 			if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_601)
1366 			{
1367 				output[0] = conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.402 * crClamped, false), false);
1368 				output[1] = conversionFormat[1].roundOut(conversionFormat[1].roundOut(yClamped - conversionFormat[1].roundOut((0.202008 / 0.587) * cbClamped, false), false) - conversionFormat[1].roundOut((0.419198 / 0.587) * crClamped, false), false);
1369 				output[2] = conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.772 * cbClamped, false), false);
1370 			}
1371 			else if (colorModel == vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_709)
1372 			{
1373 				output[0] = conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.5748 * crClamped, false), false);
1374 				output[1] = conversionFormat[1].roundOut(conversionFormat[1].roundOut(yClamped - conversionFormat[1].roundOut((0.13397432 / 0.7152) * cbClamped, false), false) - conversionFormat[1].roundOut((0.33480248 / 0.7152) * crClamped, false), false);
1375 				output[2] = conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8556 * cbClamped, false), false);
1376 			}
1377 			else
1378 			{
1379 				output[0] = conversionFormat[0].roundOut(yClamped + conversionFormat[0].roundOut(1.4746 * crClamped, false), false);
1380 				output[1] = conversionFormat[1].roundOut(conversionFormat[1].roundOut(yClamped - conversionFormat[1].roundOut(conversionFormat[1].roundOut(0.11156702 / 0.6780, false) * cbClamped, false), false) - conversionFormat[1].roundOut(conversionFormat[1].roundOut(0.38737742 / 0.6780, false) * crClamped, false), false);
1381 				output[2] = conversionFormat[2].roundOut(yClamped + conversionFormat[2].roundOut(1.8814 * cbClamped, false), false);
1382 			}
1383 			output[3] = input[3];
1384 			break;
1385 		}
1386 
1387 		default:
1388 			DE_FATAL("Unknown YCbCrModel");
1389 	}
1390 
1391 	if (colorModel != vk::VK_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY)
1392 	{
1393 		for (int ndx = 0; ndx < 3; ndx++)
1394 			output[ndx] = clampMaybe(output[ndx], 0.0, 1.0);
1395 	}
1396 }
1397 
mirror(int coord)1398 int mirror (int coord)
1399 {
1400 	return coord >= 0 ? coord : -(1 + coord);
1401 }
1402 
imod(int a,int b)1403 int imod (int a, int b)
1404 {
1405 	int m = a % b;
1406 	return m < 0 ? m + b : m;
1407 }
1408 
frac(const tcu::Interval & x)1409 tcu::Interval frac (const tcu::Interval& x)
1410 {
1411 	if (x.hi() - x.lo() >= 1.0)
1412 		return tcu::Interval(0.0, 1.0);
1413 	else
1414 	{
1415 		const tcu::Interval ret (deFrac(x.lo()), deFrac(x.hi()));
1416 
1417 		return ret;
1418 	}
1419 }
1420 
calculateUV(const tcu::FloatFormat & coordFormat,const tcu::Interval & st,const int size)1421 tcu::Interval calculateUV (const tcu::FloatFormat&	coordFormat,
1422 						   const tcu::Interval&		st,
1423 						   const int				size)
1424 {
1425 	return coordFormat.roundOut(coordFormat.roundOut(st, false) * tcu::Interval((double)size), false);
1426 }
1427 
calculateNearestIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1428 tcu::IVec2 calculateNearestIJRange (const tcu::FloatFormat&	coordFormat,
1429 								    const tcu::Interval&	uv)
1430 {
1431 	const tcu::Interval	ij	(coordFormat.roundOut(coordFormat.roundOut(uv, false) - tcu::Interval(0.5), false));
1432 
1433 	return tcu::IVec2(deRoundToInt32(ij.lo() - coordFormat.ulp(ij.lo(), 1)), deRoundToInt32(ij.hi() + coordFormat.ulp(ij.hi(), 1)));
1434 }
1435 
1436 // Calculate range of pixel coordinates that can be used as lower coordinate for linear sampling
calculateLinearIJRange(const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1437 tcu::IVec2 calculateLinearIJRange (const tcu::FloatFormat&	coordFormat,
1438 								   const tcu::Interval&		uv)
1439 {
1440 	const tcu::Interval	ij	(coordFormat.roundOut(uv - tcu::Interval(0.5), false));
1441 
1442 	return tcu::IVec2(deFloorToInt32(ij.lo()), deFloorToInt32(ij.hi()));
1443 }
1444 
calculateIJRange(vk::VkFilter filter,const tcu::FloatFormat & coordFormat,const tcu::Interval & uv)1445 tcu::IVec2 calculateIJRange (vk::VkFilter				filter,
1446 							 const tcu::FloatFormat&	coordFormat,
1447 							 const tcu::Interval&		uv)
1448 {
1449 	DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1450 	return (filter == vk::VK_FILTER_LINEAR)	? calculateLinearIJRange(coordFormat, uv)
1451 											: calculateNearestIJRange(coordFormat, uv);
1452 }
1453 
calculateAB(const deUint32 subTexelPrecisionBits,const tcu::Interval & uv,int ij)1454 tcu::Interval calculateAB (const deUint32		subTexelPrecisionBits,
1455 						   const tcu::Interval&	uv,
1456 						   int					ij)
1457 {
1458 	const deUint32		subdivisions	= 0x1u << subTexelPrecisionBits;
1459 	const tcu::Interval	ab				(frac((uv - 0.5) & tcu::Interval((double)ij, (double)(ij + 1))));
1460 	const tcu::Interval	gridAB			(ab * tcu::Interval(subdivisions));
1461 	const tcu::Interval	rounded			(de::max(deFloor(gridAB.lo()) / subdivisions, 0.0) , de::min(deCeil(gridAB.hi()) / subdivisions, 1.0));
1462 
1463 	return rounded;
1464 }
1465 
lookupWrapped(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord)1466 tcu::Interval lookupWrapped (const ChannelAccess&		access,
1467 							 const tcu::FloatFormat&	conversionFormat,
1468 							 vk::VkSamplerAddressMode	addressModeU,
1469 							 vk::VkSamplerAddressMode	addressModeV,
1470 							 const tcu::IVec2&			coord)
1471 {
1472 	return access.getChannel(conversionFormat,
1473 							 tcu::IVec3(wrap(addressModeU, coord.x(), access.getSize().x()), wrap(addressModeV, coord.y(), access.getSize().y()), 0));
1474 }
1475 
linearInterpolate(const tcu::FloatFormat & filteringFormat,const tcu::Interval & a,const tcu::Interval & b,const tcu::Interval & p00,const tcu::Interval & p10,const tcu::Interval & p01,const tcu::Interval & p11)1476 tcu::Interval linearInterpolate (const tcu::FloatFormat&	filteringFormat,
1477 								 const tcu::Interval&		a,
1478 								 const tcu::Interval&		b,
1479 								 const tcu::Interval&		p00,
1480 								 const tcu::Interval&		p10,
1481 								 const tcu::Interval&		p01,
1482 								 const tcu::Interval&		p11)
1483 {
1484 	const tcu::Interval	p[4] =
1485 	{
1486 		p00,
1487 		p10,
1488 		p01,
1489 		p11
1490 	};
1491 	tcu::Interval		result	(0.0);
1492 
1493 	for (size_t ndx = 0; ndx < 4; ndx++)
1494 	{
1495 		const tcu::Interval	weightA	(filteringFormat.roundOut((ndx % 2) == 0 ? (1.0 - a) : a, false));
1496 		const tcu::Interval	weightB	(filteringFormat.roundOut((ndx / 2) == 0 ? (1.0 - b) : b, false));
1497 		const tcu::Interval	weight	(filteringFormat.roundOut(weightA * weightB, false));
1498 
1499 		result = filteringFormat.roundOut(result + filteringFormat.roundOut(p[ndx] * weight, false), false);
1500 	}
1501 
1502 	return result;
1503 }
1504 
calculateImplicitChromaUV(const tcu::FloatFormat & coordFormat,vk::VkChromaLocation offset,const tcu::Interval & uv)1505 tcu::Interval calculateImplicitChromaUV (const tcu::FloatFormat&	coordFormat,
1506 										 vk::VkChromaLocation		offset,
1507 										 const tcu::Interval&		uv)
1508 {
1509 	if (offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN)
1510 		return coordFormat.roundOut(0.5 * coordFormat.roundOut(uv + 0.5, false), false);
1511 	else
1512 		return coordFormat.roundOut(0.5 * uv, false);
1513 }
1514 
linearSample(const ChannelAccess & access,const tcu::FloatFormat & conversionFormat,const tcu::FloatFormat & filteringFormat,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const tcu::IVec2 & coord,const tcu::Interval & a,const tcu::Interval & b)1515 tcu::Interval linearSample (const ChannelAccess&		access,
1516 						    const tcu::FloatFormat&		conversionFormat,
1517 						    const tcu::FloatFormat&		filteringFormat,
1518 						    vk::VkSamplerAddressMode	addressModeU,
1519 						    vk::VkSamplerAddressMode	addressModeV,
1520 						    const tcu::IVec2&			coord,
1521 						    const tcu::Interval&		a,
1522 						    const tcu::Interval&		b)
1523 {
1524 	return linearInterpolate(filteringFormat, a, b,
1525 									lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 0)),
1526 									lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 0)),
1527 									lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(0, 1)),
1528 									lookupWrapped(access, conversionFormat, addressModeU, addressModeV, coord + tcu::IVec2(1, 1)));
1529 }
1530 
reconstructLinearXChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation offset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1531 tcu::Interval reconstructLinearXChromaSample (const tcu::FloatFormat&	filteringFormat,
1532 											  const tcu::FloatFormat&	conversionFormat,
1533 											  vk::VkChromaLocation		offset,
1534 											  vk::VkSamplerAddressMode	addressModeU,
1535 											  vk::VkSamplerAddressMode	addressModeV,
1536 											  const ChannelAccess&		access,
1537 											  int						i,
1538 											  int						j)
1539 {
1540 	const int subI	= offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1541 					? divFloor(i, 2)
1542 					: (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1543 	const double a	= offset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1544 					? (i % 2 == 0 ? 0.0 : 0.5)
1545 					: (i % 2 == 0 ? 0.25 : 0.75);
1546 
1547 	const tcu::Interval A (filteringFormat.roundOut(       a  * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI, j)), false));
1548 	const tcu::Interval B (filteringFormat.roundOut((1.0 - a) * lookupWrapped(access, conversionFormat, addressModeU, addressModeV, tcu::IVec2(subI + 1, j)), false));
1549 	return filteringFormat.roundOut(A + B, false);
1550 }
1551 
reconstructLinearXYChromaSample(const tcu::FloatFormat & filteringFormat,const tcu::FloatFormat & conversionFormat,vk::VkChromaLocation xOffset,vk::VkChromaLocation yOffset,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,const ChannelAccess & access,int i,int j)1552 tcu::Interval reconstructLinearXYChromaSample (const tcu::FloatFormat&	filteringFormat,
1553 										  const tcu::FloatFormat&		conversionFormat,
1554 										  vk::VkChromaLocation			xOffset,
1555 										  vk::VkChromaLocation			yOffset,
1556 										  vk::VkSamplerAddressMode		addressModeU,
1557 										  vk::VkSamplerAddressMode		addressModeV,
1558 										  const ChannelAccess&			access,
1559 										  int							i,
1560 										  int							j)
1561 {
1562 	const int		subI	= xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1563 							? divFloor(i, 2)
1564 							: (i % 2 == 0 ? divFloor(i, 2) - 1 : divFloor(i, 2));
1565 	const int		subJ	= yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1566 							? divFloor(j, 2)
1567 							: (j % 2 == 0 ? divFloor(j, 2) - 1 : divFloor(j, 2));
1568 
1569 	const double	a		= xOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1570 							? (i % 2 == 0 ? 0.0 : 0.5)
1571 							: (i % 2 == 0 ? 0.25 : 0.75);
1572 	const double	b		= yOffset == vk::VK_CHROMA_LOCATION_COSITED_EVEN
1573 							? (j % 2 == 0 ? 0.0 : 0.5)
1574 							: (j % 2 == 0 ? 0.25 : 0.75);
1575 
1576 	return linearSample(access, conversionFormat, filteringFormat, addressModeU, addressModeV, tcu::IVec2(subI, subJ), a, b);
1577 }
1578 
swizzle(vk::VkComponentSwizzle swizzle,const ChannelAccess & identityPlane,const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane)1579 const ChannelAccess& swizzle (vk::VkComponentSwizzle	swizzle,
1580 							  const ChannelAccess&		identityPlane,
1581 							  const ChannelAccess&		rPlane,
1582 							  const ChannelAccess&		gPlane,
1583 							  const ChannelAccess&		bPlane,
1584 							  const ChannelAccess&		aPlane)
1585 {
1586 	switch (swizzle)
1587 	{
1588 		case vk::VK_COMPONENT_SWIZZLE_IDENTITY:	return identityPlane;
1589 		case vk::VK_COMPONENT_SWIZZLE_R:		return rPlane;
1590 		case vk::VK_COMPONENT_SWIZZLE_G:		return gPlane;
1591 		case vk::VK_COMPONENT_SWIZZLE_B:		return bPlane;
1592 		case vk::VK_COMPONENT_SWIZZLE_A:		return aPlane;
1593 
1594 		default:
1595 			DE_FATAL("Unsupported swizzle");
1596 			return identityPlane;
1597 	}
1598 }
1599 
1600 } // anonymous
1601 
wrap(vk::VkSamplerAddressMode addressMode,int coord,int size)1602 int wrap (vk::VkSamplerAddressMode	addressMode,
1603 		  int						coord,
1604 		  int						size)
1605 {
1606 	switch (addressMode)
1607 	{
1608 		case vk::VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
1609 			return (size - 1) - mirror(imod(coord, 2 * size) - size);
1610 
1611 		case vk::VK_SAMPLER_ADDRESS_MODE_REPEAT:
1612 			return imod(coord, size);
1613 
1614 		case vk::VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
1615 			return de::clamp(coord, 0, size - 1);
1616 
1617 		case vk::VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
1618 			return de::clamp(mirror(coord), 0, size - 1);
1619 
1620 		default:
1621 			DE_FATAL("Unknown wrap mode");
1622 			return ~0;
1623 	}
1624 }
1625 
divFloor(int a,int b)1626 int divFloor (int a, int b)
1627 {
1628 	if (a % b == 0)
1629 		return a / b;
1630 	else if (a > 0)
1631 		return a / b;
1632 	else
1633 		return (a / b) - 1;
1634 }
1635 
calculateBounds(const ChannelAccess & rPlane,const ChannelAccess & gPlane,const ChannelAccess & bPlane,const ChannelAccess & aPlane,const UVec4 & bitDepth,const vector<Vec2> & sts,const vector<FloatFormat> & filteringFormat,const vector<FloatFormat> & conversionFormat,const deUint32 subTexelPrecisionBits,vk::VkFilter filter,vk::VkSamplerYcbcrModelConversion colorModel,vk::VkSamplerYcbcrRange range,vk::VkFilter chromaFilter,vk::VkChromaLocation xChromaOffset,vk::VkChromaLocation yChromaOffset,const vk::VkComponentMapping & componentMapping,bool explicitReconstruction,vk::VkSamplerAddressMode addressModeU,vk::VkSamplerAddressMode addressModeV,std::vector<Vec4> & minBounds,std::vector<Vec4> & maxBounds,std::vector<Vec4> & uvBounds,std::vector<IVec4> & ijBounds)1636 void calculateBounds (const ChannelAccess&				rPlane,
1637 					  const ChannelAccess&				gPlane,
1638 					  const ChannelAccess&				bPlane,
1639 					  const ChannelAccess&				aPlane,
1640 					  const UVec4&						bitDepth,
1641 					  const vector<Vec2>&				sts,
1642 					  const vector<FloatFormat>&		filteringFormat,
1643 					  const vector<FloatFormat>&		conversionFormat,
1644 					  const deUint32					subTexelPrecisionBits,
1645 					  vk::VkFilter						filter,
1646 					  vk::VkSamplerYcbcrModelConversion	colorModel,
1647 					  vk::VkSamplerYcbcrRange			range,
1648 					  vk::VkFilter						chromaFilter,
1649 					  vk::VkChromaLocation				xChromaOffset,
1650 					  vk::VkChromaLocation				yChromaOffset,
1651 					  const vk::VkComponentMapping&		componentMapping,
1652 					  bool								explicitReconstruction,
1653 					  vk::VkSamplerAddressMode			addressModeU,
1654 					  vk::VkSamplerAddressMode			addressModeV,
1655 					  std::vector<Vec4>&				minBounds,
1656 					  std::vector<Vec4>&				maxBounds,
1657 					  std::vector<Vec4>&				uvBounds,
1658 					  std::vector<IVec4>&				ijBounds)
1659 {
1660 	const FloatFormat		highp			(-126, 127, 23, true,
1661 											 tcu::MAYBE,	// subnormals
1662 											 tcu::YES,		// infinities
1663 											 tcu::MAYBE);	// NaN
1664 	const FloatFormat		coordFormat		(-32, 32, 16, true);
1665 	const ChannelAccess&	rAccess			(swizzle(componentMapping.r, rPlane, rPlane, gPlane, bPlane, aPlane));
1666 	const ChannelAccess&	gAccess			(swizzle(componentMapping.g, gPlane, rPlane, gPlane, bPlane, aPlane));
1667 	const ChannelAccess&	bAccess			(swizzle(componentMapping.b, bPlane, rPlane, gPlane, bPlane, aPlane));
1668 	const ChannelAccess&	aAccess			(swizzle(componentMapping.a, aPlane, rPlane, gPlane, bPlane, aPlane));
1669 
1670 	const bool				subsampledX		= gAccess.getSize().x() > rAccess.getSize().x();
1671 	const bool				subsampledY		= gAccess.getSize().y() > rAccess.getSize().y();
1672 
1673 	minBounds.resize(sts.size(), Vec4(TCU_INFINITY));
1674 	maxBounds.resize(sts.size(), Vec4(-TCU_INFINITY));
1675 
1676 	uvBounds.resize(sts.size(), Vec4(TCU_INFINITY, -TCU_INFINITY, TCU_INFINITY, -TCU_INFINITY));
1677 	ijBounds.resize(sts.size(), IVec4(0x7FFFFFFF, -1 -0x7FFFFFFF, 0x7FFFFFFF, -1 -0x7FFFFFFF));
1678 
1679 	// Chroma plane sizes must match
1680 	DE_ASSERT(rAccess.getSize() == bAccess.getSize());
1681 
1682 	// Luma plane sizes must match
1683 	DE_ASSERT(gAccess.getSize() == aAccess.getSize());
1684 
1685 	// Luma plane size must match chroma plane or be twice as big
1686 	DE_ASSERT(rAccess.getSize().x() == gAccess.getSize().x() || 2 * rAccess.getSize().x() == gAccess.getSize().x());
1687 	DE_ASSERT(rAccess.getSize().y() == gAccess.getSize().y() || 2 * rAccess.getSize().y() == gAccess.getSize().y());
1688 
1689 	DE_ASSERT(filter == vk::VK_FILTER_NEAREST || filter == vk::VK_FILTER_LINEAR);
1690 	DE_ASSERT(chromaFilter == vk::VK_FILTER_NEAREST || chromaFilter == vk::VK_FILTER_LINEAR);
1691 	DE_ASSERT(subsampledX || !subsampledY);
1692 
1693 
1694 	for (size_t ndx = 0; ndx < sts.size(); ndx++)
1695 	{
1696 		const Vec2	st		(sts[ndx]);
1697 		Interval	bounds[4];
1698 
1699 		const Interval	u	(calculateUV(coordFormat, st[0], gAccess.getSize().x()));
1700 		const Interval	v	(calculateUV(coordFormat, st[1], gAccess.getSize().y()));
1701 
1702 		uvBounds[ndx][0] = (float)u.lo();
1703 		uvBounds[ndx][1] = (float)u.hi();
1704 
1705 		uvBounds[ndx][2] = (float)v.lo();
1706 		uvBounds[ndx][3] = (float)v.hi();
1707 
1708 		const IVec2	iRange	(calculateIJRange(filter, coordFormat, u));
1709 		const IVec2	jRange	(calculateIJRange(filter, coordFormat, v));
1710 
1711 		ijBounds[ndx][0] = iRange[0];
1712 		ijBounds[ndx][1] = iRange[1];
1713 
1714 		ijBounds[ndx][2] = jRange[0];
1715 		ijBounds[ndx][3] = jRange[1];
1716 
1717 		for (int j = jRange.x(); j <= jRange.y(); j++)
1718 		for (int i = iRange.x(); i <= iRange.y(); i++)
1719 		{
1720 			if (filter == vk::VK_FILTER_NEAREST)
1721 			{
1722 				const Interval	gValue	(lookupWrapped(gAccess, conversionFormat[1], addressModeU, addressModeV, IVec2(i, j)));
1723 				const Interval	aValue	(lookupWrapped(aAccess, conversionFormat[3], addressModeU, addressModeV, IVec2(i, j)));
1724 
1725 				if (explicitReconstruction || !(subsampledX || subsampledY))
1726 				{
1727 					Interval rValue, bValue;
1728 					if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1729 					{
1730 						// Reconstruct using nearest if needed, otherwise, just take what's already there.
1731 						const int subI = subsampledX ? i / 2 : i;
1732 						const int subJ = subsampledY ? j / 2 : j;
1733 						rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2(subI, subJ));
1734 						bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2(subI, subJ));
1735 					}
1736 					else // vk::VK_FILTER_LINEAR
1737 					{
1738 						if (subsampledY)
1739 						{
1740 							rValue = reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i, j);
1741 							bValue = reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i, j);
1742 						}
1743 						else
1744 						{
1745 							rValue = reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i, j);
1746 							bValue = reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i, j);
1747 						}
1748 					}
1749 
1750 					const Interval srcColor[] =
1751 					{
1752 						rValue,
1753 						gValue,
1754 						bValue,
1755 						aValue
1756 					};
1757 					Interval dstColor[4];
1758 
1759 					convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1760 
1761 					for (size_t compNdx = 0; compNdx < 4; compNdx++)
1762 						bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1763 				}
1764 				else
1765 				{
1766 					const Interval	chromaU	(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) : u);
1767 					const Interval	chromaV	(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) : v);
1768 
1769 					// Reconstructed chroma samples with implicit filtering
1770 					const IVec2	chromaIRange	(subsampledX ? calculateIJRange(chromaFilter, coordFormat, chromaU) : IVec2(i, i));
1771 					const IVec2	chromaJRange	(subsampledY ? calculateIJRange(chromaFilter, coordFormat, chromaV) : IVec2(j, j));
1772 
1773 					for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1774 					for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1775 					{
1776 						Interval rValue, bValue;
1777 
1778 						if (chromaFilter == vk::VK_FILTER_NEAREST)
1779 						{
1780 							rValue = lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2(chromaI, chromaJ));
1781 							bValue = lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2(chromaI, chromaJ));
1782 						}
1783 						else // vk::VK_FILTER_LINEAR
1784 						{
1785 							const Interval	chromaA	(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1786 							const Interval	chromaB	(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1787 
1788 							rValue = linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU, addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1789 							bValue = linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU, addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1790 						}
1791 
1792 						const Interval	srcColor[]	=
1793 						{
1794 							rValue,
1795 							gValue,
1796 							bValue,
1797 							aValue
1798 						};
1799 
1800 						Interval dstColor[4];
1801 						convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1802 
1803 						for (size_t compNdx = 0; compNdx < 4; compNdx++)
1804 							bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1805 					}
1806 				}
1807 			}
1808 			else // filter == vk::VK_FILTER_LINEAR
1809 			{
1810 				const Interval	lumaA		(calculateAB(subTexelPrecisionBits, u, i));
1811 				const Interval	lumaB		(calculateAB(subTexelPrecisionBits, v, j));
1812 
1813 				const Interval	gValue		(linearSample(gAccess, conversionFormat[1], filteringFormat[1], addressModeU, addressModeV, IVec2(i, j), lumaA, lumaB));
1814 				const Interval	aValue		(linearSample(aAccess, conversionFormat[3], filteringFormat[3], addressModeU, addressModeV, IVec2(i, j), lumaA, lumaB));
1815 
1816 				if (explicitReconstruction || !(subsampledX || subsampledY))
1817 				{
1818 					Interval rValue, bValue;
1819 					if (chromaFilter == vk::VK_FILTER_NEAREST || !subsampledX)
1820 					{
1821 						rValue = linearInterpolate(filteringFormat[0], lumaA, lumaB,
1822 													lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2(i       / (subsampledX ? 2 : 1), j       / (subsampledY ? 2 : 1))),
1823 													lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2((i + 1) / (subsampledX ? 2 : 1), j       / (subsampledY ? 2 : 1))),
1824 													lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2(i       / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1825 													lookupWrapped(rAccess, conversionFormat[0], addressModeU, addressModeV, IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1826 						bValue = linearInterpolate(filteringFormat[2], lumaA, lumaB,
1827 													lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2(i       / (subsampledX ? 2 : 1), j       / (subsampledY ? 2 : 1))),
1828 													lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2((i + 1) / (subsampledX ? 2 : 1), j       / (subsampledY ? 2 : 1))),
1829 													lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2(i       / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))),
1830 													lookupWrapped(bAccess, conversionFormat[2], addressModeU, addressModeV, IVec2((i + 1) / (subsampledX ? 2 : 1), (j + 1) / (subsampledY ? 2 : 1))));
1831 					}
1832 					else // vk::VK_FILTER_LINEAR
1833 					{
1834 						if (subsampledY)
1835 						{
1836 							// Linear, Reconstructed xx chroma samples with explicit linear filtering
1837 							rValue = linearInterpolate(filteringFormat[0], lumaA, lumaB,
1838 														reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i, j),
1839 														reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i + 1, j),
1840 														reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i , j + 1),
1841 														reconstructLinearXYChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, yChromaOffset, addressModeU, addressModeV, rAccess, i + 1, j + 1));
1842 							bValue = linearInterpolate(filteringFormat[2], lumaA, lumaB,
1843 														reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i, j),
1844 														reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i + 1, j),
1845 														reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i , j + 1),
1846 														reconstructLinearXYChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, yChromaOffset, addressModeU, addressModeV, bAccess, i + 1, j + 1));
1847 						}
1848 						else
1849 						{
1850 							// Linear, Reconstructed x chroma samples with explicit linear filtering
1851 							rValue = linearInterpolate(filteringFormat[0], lumaA, lumaB,
1852 														reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i, j),
1853 														reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i + 1, j),
1854 														reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i , j + 1),
1855 														reconstructLinearXChromaSample(filteringFormat[0], conversionFormat[0], xChromaOffset, addressModeU, addressModeV, rAccess, i + 1, j + 1));
1856 							bValue = linearInterpolate(filteringFormat[2], lumaA, lumaB,
1857 														reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i, j),
1858 														reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i + 1, j),
1859 														reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i , j + 1),
1860 														reconstructLinearXChromaSample(filteringFormat[2], conversionFormat[2], xChromaOffset, addressModeU, addressModeV, bAccess, i + 1, j + 1));
1861 						}
1862 					}
1863 
1864 					const Interval	srcColor[]	=
1865 					{
1866 						rValue,
1867 						gValue,
1868 						bValue,
1869 						aValue
1870 					};
1871 					Interval dstColor[4];
1872 
1873 					convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1874 
1875 					for (size_t compNdx = 0; compNdx < 4; compNdx++)
1876 						bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1877 				}
1878 				else
1879 				{
1880 					const Interval	chromaU	(subsampledX ? calculateImplicitChromaUV(coordFormat, xChromaOffset, u) : u);
1881 					const Interval	chromaV	(subsampledY ? calculateImplicitChromaUV(coordFormat, yChromaOffset, v) : v);
1882 
1883 					// TODO: It looks incorrect to ignore the chroma filter here. Is it?
1884 					const IVec2	chromaIRange	(calculateNearestIJRange(coordFormat, chromaU));
1885 					const IVec2	chromaJRange	(calculateNearestIJRange(coordFormat, chromaV));
1886 
1887 					for (int chromaJ = chromaJRange.x(); chromaJ <= chromaJRange.y(); chromaJ++)
1888 					for (int chromaI = chromaIRange.x(); chromaI <= chromaIRange.y(); chromaI++)
1889 					{
1890 						Interval rValue, bValue;
1891 
1892 						if (chromaFilter == vk::VK_FILTER_NEAREST)
1893 						{
1894 							rValue = lookupWrapped(rAccess, conversionFormat[1], addressModeU, addressModeV, IVec2(chromaI, chromaJ));
1895 							bValue = lookupWrapped(bAccess, conversionFormat[3], addressModeU, addressModeV, IVec2(chromaI, chromaJ));
1896 						}
1897 						else // vk::VK_FILTER_LINEAR
1898 						{
1899 							const Interval	chromaA	(calculateAB(subTexelPrecisionBits, chromaU, chromaI));
1900 							const Interval	chromaB	(calculateAB(subTexelPrecisionBits, chromaV, chromaJ));
1901 
1902 							rValue = linearSample(rAccess, conversionFormat[0], filteringFormat[0], addressModeU, addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1903 							bValue = linearSample(bAccess, conversionFormat[2], filteringFormat[2], addressModeU, addressModeV, IVec2(chromaI, chromaJ), chromaA, chromaB);
1904 						}
1905 
1906 						const Interval	srcColor[]	=
1907 						{
1908 							rValue,
1909 							gValue,
1910 							bValue,
1911 							aValue
1912 						};
1913 						Interval dstColor[4];
1914 						convertColor(colorModel, range, conversionFormat, bitDepth, srcColor, dstColor);
1915 
1916 						for (size_t compNdx = 0; compNdx < 4; compNdx++)
1917 							bounds[compNdx] |= highp.roundOut(dstColor[compNdx], false);
1918 					}
1919 				}
1920 			}
1921 		}
1922 
1923 		minBounds[ndx] = Vec4((float)bounds[0].lo(), (float)bounds[1].lo(), (float)bounds[2].lo(), (float)bounds[3].lo());
1924 		maxBounds[ndx] = Vec4((float)bounds[0].hi(), (float)bounds[1].hi(), (float)bounds[2].hi(), (float)bounds[3].hi());
1925 	}
1926 }
1927 
1928 } // ycbcr
1929 
1930 } // vkt
1931