1 /*
2 * Copyright (c) 2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_backend_gles.h"
17
18 #include <algorithm>
19
20 #include <base/containers/fixed_string.h>
21 #include <render/datastore/render_data_store_render_pods.h> // NodeGraphBackbufferConfiguration...
22 #include <render/namespace.h>
23
24 #if (RENDER_PERF_ENABLED == 1)
25 #include <core/perf/cpu_perf_scope.h>
26 #include <core/perf/intf_performance_data_manager.h>
27
28 #include "perf/gpu_query.h"
29 #include "perf/gpu_query_manager.h"
30 #endif
31 #include "device/gpu_resource_manager.h"
32 #include "gles/device_gles.h"
33 #include "gles/gl_functions.h"
34 #include "gles/gpu_buffer_gles.h"
35 #include "gles/gpu_image_gles.h"
36 #include "gles/gpu_program_gles.h"
37 #include "gles/gpu_query_gles.h"
38 #include "gles/gpu_sampler_gles.h"
39 #include "gles/gpu_semaphore_gles.h"
40 #include "gles/node_context_descriptor_set_manager_gles.h"
41 #include "gles/node_context_pool_manager_gles.h"
42 #include "gles/pipeline_state_object_gles.h"
43 #include "gles/render_frame_sync_gles.h"
44 #include "gles/swapchain_gles.h"
45 #include "nodecontext/pipeline_descriptor_set_binder.h"
46 #include "nodecontext/render_command_list.h"
47 #include "nodecontext/render_node_graph_node_store.h" // RenderCommandFrameData
48 #include "util/log.h"
49 #include "util/render_frame_util.h"
50
51 #define IS_BIT(value, bit) ((((value) & (bit)) == (bit)) ? true : false)
52 #define IS_BIT_GL(value, bit) ((((value) & (bit)) == (bit)) ? (GLboolean)GL_TRUE : (GLboolean)GL_FALSE)
53
54 using namespace BASE_NS;
55
56 // NOTE: implement missing commands, add state caching and cleanup a bit more.
57 RENDER_BEGIN_NAMESPACE()
58 namespace Gles {
59 // Indices to colorBlendConstants
60 static constexpr uint32_t RED_INDEX = 0;
61 static constexpr uint32_t GREEN_INDEX = 1;
62 static constexpr uint32_t BLUE_INDEX = 2;
63 static constexpr uint32_t ALPHA_INDEX = 3;
64 static constexpr uint32_t CUBEMAP_LAYERS = 6;
65 } // namespace Gles
66
67 namespace {
68 constexpr GLenum LAYER_ID[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
69 GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
70 GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
71
GetCubeMapTarget(GLenum type,uint32_t layer)72 GLenum GetCubeMapTarget(GLenum type, uint32_t layer)
73 {
74 if (type == GL_TEXTURE_CUBE_MAP) {
75 PLUGIN_ASSERT_MSG(layer < Gles::CUBEMAP_LAYERS, "Invalid cubemap index %u", layer);
76 return LAYER_ID[layer];
77 }
78 PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
79 return GL_NONE;
80 }
81
GetTarget(GLenum type,uint32_t layer,uint32_t sampleCount)82 GLenum GetTarget(GLenum type, uint32_t layer, uint32_t sampleCount)
83 {
84 if (type == GL_TEXTURE_2D) {
85 if (sampleCount > 1) {
86 return GL_TEXTURE_2D_MULTISAMPLE;
87 }
88 return GL_TEXTURE_2D;
89 }
90 if (type == GL_TEXTURE_CUBE_MAP) {
91 PLUGIN_ASSERT_MSG(sampleCount == 1, "Cubemap texture can't have MSAA");
92 return GetCubeMapTarget(type, layer);
93 }
94 PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
95 return GL_NONE;
96 }
97
98 struct BlitArgs {
99 uint32_t mipLevel {};
100 Size3D rect0 {};
101 Size3D rect1 {};
102 uint32_t height {};
103 };
104
DoBlit(const Filter filter,const BlitArgs & src,const BlitArgs & dst)105 void DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)
106 {
107 // Handle top-left / bottom-left origin conversion
108 auto sy = static_cast<GLint>(src.rect0.height);
109 const auto sh = static_cast<const GLint>(src.rect1.height);
110 const auto sfh = static_cast<GLint>(src.height >> src.mipLevel);
111 sy = sfh - (sy + sh);
112 auto dy = static_cast<GLint>(dst.rect0.height);
113 const auto dh = static_cast<const GLint>(dst.rect1.height);
114 const auto dfh = static_cast<GLint>(dst.height >> dst.mipLevel);
115 dy = dfh - (dy + dh);
116 GLenum glfilter = GL_NEAREST;
117 if (filter == CORE_FILTER_NEAREST) {
118 glfilter = GL_NEAREST;
119 } else if (filter == CORE_FILTER_LINEAR) {
120 glfilter = GL_LINEAR;
121 } else {
122 PLUGIN_ASSERT_MSG(false, "RenderCommandBlitImage Invalid filter mode");
123 }
124 glBlitFramebuffer(static_cast<GLint>(src.rect0.width), sy, static_cast<GLint>(src.rect1.width), sfh,
125 static_cast<GLint>(dst.rect0.width), dy, static_cast<GLint>(dst.rect1.width), dfh, GL_COLOR_BUFFER_BIT,
126 glfilter);
127 }
128
GetPrimFromTopology(PrimitiveTopology op)129 GLenum GetPrimFromTopology(PrimitiveTopology op)
130 {
131 switch (op) {
132 case CORE_PRIMITIVE_TOPOLOGY_POINT_LIST:
133 return GL_POINTS;
134 case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST:
135 return GL_LINES;
136 case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP:
137 return GL_LINE_STRIP;
138 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
139 return GL_TRIANGLES;
140 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
141 return GL_TRIANGLE_STRIP;
142 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
143 return GL_TRIANGLE_FAN;
144 #if defined(GL_ES_VERSION_3_2) || defined(GL_VERSION_3_2)
145 // The following are valid after gles 3.2
146 case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
147 return GL_LINES_ADJACENCY;
148 case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
149 return GL_LINE_STRIP_ADJACENCY;
150 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
151 return GL_TRIANGLES_ADJACENCY;
152 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
153 return GL_TRIANGLE_STRIP_ADJACENCY;
154 case CORE_PRIMITIVE_TOPOLOGY_PATCH_LIST:
155 return GL_PATCHES;
156 #endif
157 default:
158 PLUGIN_ASSERT_MSG(false, "Unsupported primitive topology");
159 break;
160 }
161 return GL_POINTS;
162 }
163
GetBlendOp(BlendOp func)164 GLenum GetBlendOp(BlendOp func)
165 {
166 switch (func) {
167 case CORE_BLEND_OP_ADD:
168 return GL_FUNC_ADD;
169 case CORE_BLEND_OP_SUBTRACT:
170 return GL_FUNC_SUBTRACT;
171 case CORE_BLEND_OP_REVERSE_SUBTRACT:
172 return GL_FUNC_REVERSE_SUBTRACT;
173 case CORE_BLEND_OP_MIN:
174 return GL_MIN;
175 case CORE_BLEND_OP_MAX:
176 return GL_MAX;
177 default:
178 break;
179 }
180 return GL_FUNC_ADD;
181 }
182
GetBlendFactor(BlendFactor factor)183 GLenum GetBlendFactor(BlendFactor factor)
184 {
185 switch (factor) {
186 case CORE_BLEND_FACTOR_ZERO:
187 return GL_ZERO;
188 case CORE_BLEND_FACTOR_ONE:
189 return GL_ONE;
190 case CORE_BLEND_FACTOR_SRC_COLOR:
191 return GL_SRC_COLOR;
192 case CORE_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
193 return GL_ONE_MINUS_SRC_COLOR;
194 case CORE_BLEND_FACTOR_DST_COLOR:
195 return GL_DST_COLOR;
196 case CORE_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
197 return GL_ONE_MINUS_DST_COLOR;
198 case CORE_BLEND_FACTOR_SRC_ALPHA:
199 return GL_SRC_ALPHA;
200 case CORE_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
201 return GL_ONE_MINUS_SRC_ALPHA;
202 case CORE_BLEND_FACTOR_DST_ALPHA:
203 return GL_DST_ALPHA;
204 case CORE_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
205 return GL_ONE_MINUS_DST_ALPHA;
206 case CORE_BLEND_FACTOR_CONSTANT_COLOR:
207 return GL_CONSTANT_COLOR;
208 case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
209 return GL_ONE_MINUS_CONSTANT_COLOR;
210 case CORE_BLEND_FACTOR_CONSTANT_ALPHA:
211 return GL_CONSTANT_ALPHA;
212 case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
213 return GL_ONE_MINUS_CONSTANT_ALPHA;
214 case CORE_BLEND_FACTOR_SRC_ALPHA_SATURATE:
215 return GL_SRC_ALPHA_SATURATE;
216 // NOTE: check the GLES3.2...
217 /* following requires EXT_blend_func_extended (dual source blending) */
218 case CORE_BLEND_FACTOR_SRC1_COLOR:
219 case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
220 case CORE_BLEND_FACTOR_SRC1_ALPHA:
221 case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
222 default:
223 break;
224 }
225 return GL_ONE;
226 }
227
GetCompareOp(CompareOp aOp)228 GLenum GetCompareOp(CompareOp aOp)
229 {
230 switch (aOp) {
231 case CORE_COMPARE_OP_NEVER:
232 return GL_NEVER;
233 case CORE_COMPARE_OP_LESS:
234 return GL_LESS;
235 case CORE_COMPARE_OP_EQUAL:
236 return GL_EQUAL;
237 case CORE_COMPARE_OP_LESS_OR_EQUAL:
238 return GL_LEQUAL;
239 case CORE_COMPARE_OP_GREATER:
240 return GL_GREATER;
241 case CORE_COMPARE_OP_NOT_EQUAL:
242 return GL_NOTEQUAL;
243 case CORE_COMPARE_OP_GREATER_OR_EQUAL:
244 return GL_GEQUAL;
245 case CORE_COMPARE_OP_ALWAYS:
246 return GL_ALWAYS;
247 default:
248 break;
249 }
250 return GL_ALWAYS;
251 }
252
GetStencilOp(StencilOp aOp)253 GLenum GetStencilOp(StencilOp aOp)
254 {
255 switch (aOp) {
256 case CORE_STENCIL_OP_KEEP:
257 return GL_KEEP;
258 case CORE_STENCIL_OP_ZERO:
259 return GL_ZERO;
260 case CORE_STENCIL_OP_REPLACE:
261 return GL_REPLACE;
262 case CORE_STENCIL_OP_INCREMENT_AND_CLAMP:
263 return GL_INCR;
264 case CORE_STENCIL_OP_DECREMENT_AND_CLAMP:
265 return GL_DECR;
266 case CORE_STENCIL_OP_INVERT:
267 return GL_INVERT;
268 case CORE_STENCIL_OP_INCREMENT_AND_WRAP:
269 return GL_INCR_WRAP;
270 case CORE_STENCIL_OP_DECREMENT_AND_WRAP:
271 return GL_DECR_WRAP;
272 default:
273 break;
274 }
275 return GL_KEEP;
276 }
277
SetState(GLenum type,bool enabled)278 void SetState(GLenum type, bool enabled)
279 {
280 if (enabled) {
281 glEnable(type);
282 } else {
283 glDisable(type);
284 }
285 }
286
SetCullMode(const GraphicsState::RasterizationState & rs)287 void SetCullMode(const GraphicsState::RasterizationState& rs)
288 {
289 SetState(GL_CULL_FACE, (rs.cullModeFlags != CORE_CULL_MODE_NONE));
290
291 switch (rs.cullModeFlags) {
292 case CORE_CULL_MODE_FRONT_BIT:
293 glCullFace(GL_FRONT);
294 break;
295 case CORE_CULL_MODE_BACK_BIT:
296 glCullFace(GL_BACK);
297 break;
298 case CORE_CULL_MODE_FRONT_AND_BACK:
299 glCullFace(GL_FRONT_AND_BACK);
300 break;
301 case CORE_CULL_MODE_NONE:
302 default:
303 break;
304 }
305 }
306
SetFrontFace(const GraphicsState::RasterizationState & rs)307 void SetFrontFace(const GraphicsState::RasterizationState& rs)
308 {
309 switch (rs.frontFace) {
310 case CORE_FRONT_FACE_COUNTER_CLOCKWISE:
311 glFrontFace(GL_CCW);
312 break;
313 case CORE_FRONT_FACE_CLOCKWISE:
314 glFrontFace(GL_CW);
315 break;
316 default:
317 break;
318 }
319 }
320
321 #if RENDER_HAS_GL_BACKEND
SetPolygonMode(const GraphicsState::RasterizationState & rs)322 void SetPolygonMode(const GraphicsState::RasterizationState& rs)
323 {
324 GLenum mode;
325 switch (rs.polygonMode) {
326 default:
327 case CORE_POLYGON_MODE_FILL:
328 mode = GL_FILL;
329 break;
330 case CORE_POLYGON_MODE_LINE:
331 mode = GL_LINE;
332 break;
333 case CORE_POLYGON_MODE_POINT:
334 mode = GL_POINT;
335 break;
336 }
337 glPolygonMode(GL_FRONT_AND_BACK, mode);
338 }
339 #endif
340
Invalidate(GLenum framebuffer,int32_t count,const GLenum invalidate[],const RenderPassDesc & rpd,const LowlevelFramebufferGL & frameBuffer)341 void Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd,
342 const LowlevelFramebufferGL& frameBuffer)
343 {
344 if (count > 0) {
345 if ((frameBuffer.width == rpd.renderArea.extentWidth) && (frameBuffer.height == rpd.renderArea.extentHeight)) {
346 // Invalidate the whole buffer. (attachment sizes match render area)
347 glInvalidateFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate);
348 } else {
349 // invalidate only a part of the render target..
350 // NOTE: verify that this works, we might need to flip the Y axis the same way as scissors etc.
351 const auto X = static_cast<const GLint>(rpd.renderArea.offsetX);
352 const auto Y = static_cast<const GLint>(rpd.renderArea.offsetY);
353 const auto W = static_cast<const GLsizei>(rpd.renderArea.extentWidth);
354 const auto H = static_cast<const GLsizei>(rpd.renderArea.extentHeight);
355 glInvalidateSubFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate, X, Y, W, H);
356 }
357 }
358 }
359
360 struct BlitData {
361 const GpuImagePlatformDataGL& iPlat;
362 const GpuImageDesc& imageDesc;
363 const BufferImageCopy& bufferImageCopy;
364 uintptr_t data { 0 };
365 uint64_t size { 0 };
366 uint64_t sizeOfData { 0 };
367 bool compressed { false };
368 };
369
BlitArray(DeviceGLES & device_,const BlitData & bd)370 void BlitArray(DeviceGLES& device_, const BlitData& bd)
371 {
372 const auto& iPlat = bd.iPlat;
373 const auto& bufferImageCopy = bd.bufferImageCopy;
374 const auto& imageSubresource = bufferImageCopy.imageSubresource;
375 const auto& imageDesc = bd.imageDesc;
376 const uint32_t mip = imageSubresource.mipLevel;
377 const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
378 // NOTE: image offset depth is ignored
379 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
380 const Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
381 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height),
382 Math::min(imageSize.z, bufferImageCopy.imageExtent.depth) };
383 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
384 if (valid) {
385 uintptr_t data = bd.data;
386 const uint32_t layerCount = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
387 for (uint32_t layer = imageSubresource.baseArrayLayer; layer < layerCount; layer++) {
388 const Math::UVec3 offset3D { offset.x, offset.y, layer };
389 if (bd.compressed) {
390 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
391 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
392 } else {
393 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
394 iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
395 }
396 data += static_cast<ptrdiff_t>(bd.sizeOfData);
397 }
398 }
399 }
400
Blit2D(DeviceGLES & device_,const BlitData & bd)401 void Blit2D(DeviceGLES& device_, const BlitData& bd)
402 {
403 const auto& iPlat = bd.iPlat;
404 const auto& bufferImageCopy = bd.bufferImageCopy;
405 const auto& imageSubresource = bufferImageCopy.imageSubresource;
406 const auto& imageDesc = bd.imageDesc;
407 const uint32_t mip = imageSubresource.mipLevel;
408 const Math::UVec2 imageSize { imageDesc.width >> mip, imageDesc.height >> mip };
409 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
410 const Math::UVec2 extent { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
411 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height) };
412 PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == 1,
413 "RenderCommandCopyBufferImage Texture2D with baseArrayLayer!=0 && layerCount!= 1");
414 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
415 const uintptr_t data = bd.data;
416 if (valid && bd.compressed) {
417 device_.CompressedTexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent,
418 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
419 } else if (valid) {
420 device_.TexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent, iPlat.format,
421 iPlat.dataType, reinterpret_cast<const void*>(data));
422 }
423 }
424
Blit3D(DeviceGLES & device_,const BlitData & bd)425 void Blit3D(DeviceGLES& device_, const BlitData& bd)
426 {
427 const auto& iPlat = bd.iPlat;
428 const auto& bufferImageCopy = bd.bufferImageCopy;
429 const auto& imageSubresource = bufferImageCopy.imageSubresource;
430 const auto& imageDesc = bd.imageDesc;
431 const uint32_t mip = imageSubresource.mipLevel;
432 const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth >> mip };
433 const Math::UVec3 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height,
434 bufferImageCopy.imageOffset.depth };
435 Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
436 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height), Math::min(imageSize.z - offset.z, 1U) };
437 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
438 if (valid) {
439 uintptr_t data = bd.data;
440 for (uint32_t slice = 0U; slice < imageSize.z; ++slice) {
441 const Math::UVec3 offset3D { offset.x, offset.y, slice };
442 if (bd.compressed) {
443 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
444 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
445 } else {
446 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
447 iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
448 }
449 // offsets one slice
450 data += static_cast<uintptr_t>(bd.sizeOfData);
451 }
452 }
453 }
454
BlitCube(DeviceGLES & device_,const BlitData & bd)455 void BlitCube(DeviceGLES& device_, const BlitData& bd)
456 {
457 const auto& iPlat = bd.iPlat;
458 const auto& bufferImageCopy = bd.bufferImageCopy;
459 const auto& imageSubresource = bufferImageCopy.imageSubresource;
460 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
461 const Math::UVec2 extent { bufferImageCopy.imageExtent.width, bufferImageCopy.imageExtent.height };
462 constexpr GLenum faceId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
463 GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
464 GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
465 PLUGIN_UNUSED(Gles::CUBEMAP_LAYERS);
466 PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == Gles::CUBEMAP_LAYERS,
467 "RenderCommandCopyBufferImage Cubemap with baseArrayLayer!=0 && layerCount!= 6");
468 uintptr_t data = bd.data;
469 const uint32_t lastLayer = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
470 for (uint32_t i = imageSubresource.baseArrayLayer; i < lastLayer; i++) {
471 const GLenum face = faceId[i]; // convert layer index to cube map face id.
472 if (face == 0) {
473 // reached the end of cubemap faces (see faceId)
474 // so must stop copying.
475 break;
476 }
477 if (bd.compressed) {
478 device_.CompressedTexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent,
479 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
480 } else {
481 device_.TexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent, iPlat.format,
482 iPlat.dataType, reinterpret_cast<const void*>(data));
483 }
484 data += static_cast<uintptr_t>(bd.sizeOfData);
485 }
486 }
487 template<bool usePixelUnpackBuffer>
488
SetupBlit(DeviceGLES & device_,const BufferImageCopy & bufferImageCopy,GpuBufferGLES & srcGpuBuffer,const GpuImageGLES & dstGpuImage)489 BlitData SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer,
490 const GpuImageGLES& dstGpuImage)
491 {
492 const auto& iPlat = dstGpuImage.GetPlatformData();
493 const auto& imageOffset = bufferImageCopy.imageOffset;
494 PLUGIN_UNUSED(imageOffset);
495 const auto& imageExtent = bufferImageCopy.imageExtent;
496 auto width = (!bufferImageCopy.bufferImageHeight || bufferImageCopy.bufferRowLength)
497 ? bufferImageCopy.imageExtent.width
498 : bufferImageCopy.bufferRowLength;
499 auto height = (!bufferImageCopy.bufferImageHeight || bufferImageCopy.bufferRowLength)
500 ? bufferImageCopy.imageExtent.height
501 : bufferImageCopy.bufferImageHeight;
502 // size is calculated for single layer / slice
503 const uint64_t size =
504 static_cast<uint64_t>(iPlat.bytesperpixel) * static_cast<uint64_t>(width) * static_cast<uint64_t>(height);
505 uintptr_t data = bufferImageCopy.bufferOffset;
506 if constexpr (usePixelUnpackBuffer) {
507 const auto& plat = srcGpuBuffer.GetPlatformData();
508 device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, plat.buffer);
509 } else {
510 // Use the mapped pointer for glTexSubImage2D, this is a workaround on GL_INVALID_OPERATION on PVR GLES
511 // simulator and crash with ETC2 textures on NVIDIA..
512 data += reinterpret_cast<uintptr_t>(srcGpuBuffer.Map());
513 }
514 uint64_t sizeOfData = size;
515 const auto& compinfo = iPlat.compression;
516 if (compinfo.compressed) {
517 // how many blocks in width
518 const int64_t blockW = (imageExtent.width + (compinfo.blockW - 1)) / compinfo.blockW;
519 // how many blocks in height
520 const int64_t blockH = (imageExtent.height + (compinfo.blockH - 1)) / compinfo.blockH;
521 // size in bytes..
522 sizeOfData = static_cast<uint64_t>(((blockW * blockH) * compinfo.bytesperblock));
523
524 // Warn for partial copies. we do not handle those at the moment.
525 if (bufferImageCopy.bufferRowLength != 0) {
526 if (bufferImageCopy.bufferRowLength != blockW * compinfo.blockW) {
527 PLUGIN_LOG_W("Partial copies of compressed texture data is not currently supported. "
528 "Stride must match image width (with block align). "
529 "bufferImageCopy.bufferRowLength(%d) "
530 "imageExtent.width(%d) ",
531 bufferImageCopy.bufferRowLength, imageExtent.width);
532 }
533 }
534 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
535 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0);
536 } else {
537 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(bufferImageCopy.bufferRowLength));
538 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, static_cast<GLint>(bufferImageCopy.bufferImageHeight));
539 }
540 glPixelStorei(GL_UNPACK_ALIGNMENT, 1); // Make sure the align is tight.
541 return { iPlat, dstGpuImage.GetDesc(), bufferImageCopy, data, size, sizeOfData, compinfo.compressed };
542 }
543
544 template<bool usePixelUnpackBuffer>
FinishBlit(DeviceGLES & device_,const GpuBufferGLES & srcGpuBuffer)545 void FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)
546 {
547 if constexpr (usePixelUnpackBuffer) {
548 device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
549 } else {
550 srcGpuBuffer.Unmap();
551 }
552 }
553
554 template<typename T, size_t N>
Compare(const T (& a)[N],const T (& b)[N])555 constexpr size_t Compare(const T (&a)[N], const T (&b)[N])
556 {
557 for (size_t i = 0; i < N; i++) {
558 if (a[i] != b[i])
559 return false;
560 }
561 return true;
562 }
563
564 template<typename T, size_t N>
565
Set(T (& a)[N],const T (& b)[N])566 constexpr size_t Set(T (&a)[N], const T (&b)[N])
567 {
568 for (size_t i = 0; i < N; i++) {
569 a[i] = b[i];
570 }
571 return true;
572 }
573
CompareBlendFactors(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)574 bool CompareBlendFactors(
575 const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
576 {
577 return (a.srcColorBlendFactor == b.srcColorBlendFactor) && (a.srcAlphaBlendFactor == b.srcAlphaBlendFactor) &&
578 (a.dstColorBlendFactor == b.dstColorBlendFactor) && (a.dstAlphaBlendFactor == b.dstAlphaBlendFactor);
579 }
580
SetBlendFactors(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)581 void SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
582 {
583 a.srcColorBlendFactor = b.srcColorBlendFactor;
584 a.srcAlphaBlendFactor = b.srcAlphaBlendFactor;
585 a.dstColorBlendFactor = b.dstColorBlendFactor;
586 a.dstAlphaBlendFactor = b.dstAlphaBlendFactor;
587 }
588
CompareBlendOps(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)589 bool CompareBlendOps(
590 const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
591 {
592 return (a.colorBlendOp == b.colorBlendOp) && (a.alphaBlendOp == b.alphaBlendOp);
593 }
594
SetBlendOps(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)595 void SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
596 {
597 a.colorBlendOp = b.colorBlendOp;
598 a.alphaBlendOp = b.alphaBlendOp;
599 }
600
CompareStencilOp(const GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)601 bool CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
602 {
603 return (a.failOp == b.failOp) && (a.depthFailOp == b.depthFailOp) && (a.passOp == b.passOp);
604 }
605
SetStencilOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)606 void SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
607 {
608 a.failOp = b.failOp;
609 a.depthFailOp = b.depthFailOp;
610 a.passOp = b.passOp;
611 }
612
SetStencilCompareOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)613 void SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
614 {
615 a.compareOp = b.compareOp;
616 a.compareMask = b.compareMask;
617 a.reference = b.reference;
618 }
619
620 #if RENDER_VALIDATION_ENABLED
ValidateCopyImage(const Offset3D & offset,const Size3D & extent,uint32_t mipLevel,const GpuImageDesc & imageDesc)621 void ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)
622 {
623 if (mipLevel >= imageDesc.mipCount) {
624 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage mipLevel must be less than image mipCount.");
625 }
626 if ((offset.x < 0) || (offset.y < 0) || (offset.z < 0)) {
627 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset must not be negative.");
628 }
629 if (((offset.x + extent.width) > imageDesc.width) || ((offset.y + extent.height) > imageDesc.height) ||
630 ((offset.z + extent.depth) > imageDesc.depth)) {
631 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset + extent does not fit in image.");
632 }
633 }
634
ValidateCopyImage(const ImageCopy & imageCopy,const GpuImageDesc & srcImageDesc,const GpuImageDesc & dstImageDesc)635 void ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)
636 {
637 ValidateCopyImage(imageCopy.srcOffset, imageCopy.extent, imageCopy.srcSubresource.mipLevel, srcImageDesc);
638 ValidateCopyImage(imageCopy.dstOffset, imageCopy.extent, imageCopy.dstSubresource.mipLevel, dstImageDesc);
639 }
640 #endif
641
ClampOffset(int32_t & srcOffset,int32_t & dstOffset,uint32_t & size)642 constexpr void ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)
643 {
644 if (srcOffset < 0) {
645 auto iSize = static_cast<int32_t>(size);
646 size = static_cast<uint32_t>(iSize + srcOffset);
647 dstOffset -= srcOffset;
648 srcOffset = 0;
649 }
650 }
651
ClampOffset(Offset3D & srcOffset,Offset3D & dstOffset,Size3D & size)652 constexpr void ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)
653 {
654 ClampOffset(srcOffset.x, dstOffset.x, size.width);
655 ClampOffset(srcOffset.y, dstOffset.y, size.height);
656 ClampOffset(srcOffset.z, dstOffset.z, size.depth);
657 }
658
ClampSize(int32_t offset,uint32_t maxSize,uint32_t & size)659 constexpr void ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)
660 {
661 if (size > static_cast<uint32_t>(static_cast<int32_t>(maxSize) - offset)) {
662 size = static_cast<uint32_t>(static_cast<int32_t>(maxSize) - offset);
663 }
664 }
665
ClampSize(const Offset3D & offset,const GpuImageDesc & desc,Size3D & size)666 constexpr void ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)
667 {
668 ClampSize(offset.x, desc.width, size.width);
669 ClampSize(offset.y, desc.height, size.height);
670 ClampSize(offset.z, desc.depth, size.depth);
671 }
672
673 // helper which covers barriers supported by Barrier and BarrierByRegion
CommonBarrierBits(AccessFlags accessFlags,RenderHandleType resourceType)674 constexpr GLbitfield CommonBarrierBits(AccessFlags accessFlags, RenderHandleType resourceType)
675 {
676 GLbitfield barriers = 0;
677 if (accessFlags & CORE_ACCESS_UNIFORM_READ_BIT) {
678 barriers |= GL_UNIFORM_BARRIER_BIT;
679 }
680 if (accessFlags & CORE_ACCESS_SHADER_READ_BIT) {
681 // shader read covers UBO, SSBO, storage image etc. use resource type to limit the options.
682 if (resourceType == RenderHandleType::GPU_IMAGE) {
683 barriers |= GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
684 } else if (resourceType == RenderHandleType::GPU_BUFFER) {
685 barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
686 } else {
687 barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
688 GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
689 }
690 }
691 if (accessFlags & CORE_ACCESS_SHADER_WRITE_BIT) {
692 if (resourceType == RenderHandleType::GPU_IMAGE) {
693 barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
694 } else if (resourceType == RenderHandleType::GPU_BUFFER) {
695 barriers |= GL_SHADER_STORAGE_BARRIER_BIT;
696 } else {
697 barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
698 }
699 }
700 if (accessFlags & (CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT |
701 CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)) {
702 barriers |= GL_FRAMEBUFFER_BARRIER_BIT;
703 }
704 // GL_ATOMIC_COUNTER_BARRIER_BIT is not used at the moment
705 return barriers;
706 }
707 } // namespace
708
RenderBackendGLES(Device & device,GpuResourceManager & gpuResourceManager)709 RenderBackendGLES::RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)
710 : RenderBackend(), device_(static_cast<DeviceGLES&>(device)), gpuResourceMgr_(gpuResourceManager)
711 {
712 #if (RENDER_PERF_ENABLED == 1)
713 validGpuQueries_ = false;
714 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
715 gpuQueryMgr_ = make_unique<GpuQueryManager>();
716 #if RENDER_HAS_GL_BACKEND
717 if (device_.GetBackendType() == DeviceBackendType::OPENGL) {
718 validGpuQueries_ = true;
719 }
720 #endif
721 #if RENDER_HAS_GLES_BACKEND
722 if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
723 // Check if GL_EXT_disjoint_timer_query is available.
724 validGpuQueries_ = device_.HasExtension("GL_EXT_disjoint_timer_query");
725 }
726 #endif
727 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
728 #endif // RENDER_PERF_ENABLED
729 #if RENDER_HAS_GLES_BACKEND
730 if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
731 multisampledRenderToTexture_ = device_.HasExtension("GL_EXT_multisampled_render_to_texture2");
732 }
733 #endif
734 PLUGIN_ASSERT(device_.IsActive());
735 PrimeCache(GraphicsState {}); // Initializes cache.
736 glGenFramebuffers(1, &blitImageSourceFbo_);
737 glGenFramebuffers(1, &blitImageDestinationFbo_);
738 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
739 PLUGIN_LOG_D("fbo id >: %u", blitImageSourceFbo_);
740 PLUGIN_LOG_D("fbo id >: %u", blitImageDestinationFbo_);
741 #endif
742 #if !RENDER_HAS_GLES_BACKEND
743 glEnable(GL_PROGRAM_POINT_SIZE);
744 #endif
745 }
746
~RenderBackendGLES()747 RenderBackendGLES::~RenderBackendGLES()
748 {
749 PLUGIN_ASSERT(device_.IsActive());
750 device_.DeleteFrameBuffer(blitImageSourceFbo_);
751 device_.DeleteFrameBuffer(blitImageDestinationFbo_);
752 }
753
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)754 void RenderBackendGLES::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
755 {
756 if (!backBufferConfig.swapchainData.empty()) {
757 if (device_.HasSwapchain()) {
758 #if (RENDER_PERF_ENABLED == 1)
759 commonCpuTimers_.present.Begin();
760 #endif
761 for (const auto& swapchainData : backBufferConfig.swapchainData) {
762 #if (RENDER_DEV_ENABLED == 1)
763 if (swapchainData.config.gpuSemaphoreHandle) {
764 // NOTE: not implemented
765 PLUGIN_LOG_E("NodeGraphBackBufferConfiguration semaphore not signaled");
766 }
767 #endif
768 const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapchainData.handle));
769 if (swp) {
770 #if RENDER_GL_FLIP_Y_SWAPCHAIN
771 // Blit and flip our swapchain frame to backbuffer..
772 const auto& sdesc = swp->GetDesc();
773 if (scissorEnabled_) {
774 glDisable(GL_SCISSOR_TEST);
775 scissorEnabled_ = false;
776 }
777 const auto& platSwapchain = swp->GetPlatformData();
778 device_.BindReadFrameBuffer(platSwapchain.fbos[presentationInfo_.swapchainImageIndex]);
779 device_.BindWriteFrameBuffer(0); // FBO 0 is the surface bound to current context..
780 glBlitFramebuffer(0, 0, (GLint)sdesc.width, (GLint)sdesc.height, 0, (GLint)sdesc.height,
781 (GLint)sdesc.width, 0, GL_COLOR_BUFFER_BIT, GL_NEAREST);
782 device_.BindReadFrameBuffer(0);
783 #endif
784 device_.SwapBuffers(*swp);
785 }
786 }
787 #if (RENDER_PERF_ENABLED == 1)
788 commonCpuTimers_.present.End();
789 #endif
790 }
791 }
792 }
793
ResetState()794 void RenderBackendGLES::ResetState()
795 {
796 boundProgram_ = {};
797 boundIndexBuffer_ = {};
798 vertexAttribBinds_ = 0;
799 renderingToDefaultFbo_ = false;
800 boundComputePipeline_ = nullptr;
801 boundGraphicsPipeline_ = nullptr;
802 currentPsoHandle_ = {};
803 renderArea_ = {};
804 activeRenderPass_ = {};
805 currentSubPass_ = 0;
806 currentFrameBuffer_ = nullptr;
807 inRenderpass_ = 0;
808 descriptorUpdate_ = false;
809 vertexBufferUpdate_ = false;
810 indexBufferUpdate_ = false;
811 }
812
ResetBindings()813 void RenderBackendGLES::ResetBindings()
814 {
815 boundComputePipeline_ = nullptr;
816 boundGraphicsPipeline_ = nullptr;
817 currentPsoHandle_ = {};
818 }
819
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)820 void RenderBackendGLES::Render(
821 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
822 {
823 // NOTE: all command lists are validated before entering here
824 PLUGIN_ASSERT(device_.IsActive());
825 #if (RENDER_PERF_ENABLED == 1)
826 commonCpuTimers_.full.Begin();
827 commonCpuTimers_.acquire.Begin();
828 #endif
829 presentationInfo_ = {};
830
831 if (device_.HasSwapchain() && (!backBufferConfig.swapchainData.empty())) {
832 for (const auto& swapData : backBufferConfig.swapchainData) {
833 if (const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapData.handle))) {
834 presentationInfo_.swapchainImageIndex = swp->GetNextImage();
835 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
836 if (presentationInfo_.swapchainImageIndex < swapchainData.imageViewCount) {
837 // remap image to backbuffer
838 const RenderHandle currentSwapchainHandle =
839 swapchainData.imageViews[presentationInfo_.swapchainImageIndex];
840 // special swapchain remapping
841 gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(swapData.handle, currentSwapchainHandle);
842 }
843 }
844 }
845 }
846 #if (RENDER_PERF_ENABLED == 1)
847 commonCpuTimers_.acquire.End();
848
849 StartFrameTimers(renderCommandFrameData);
850 commonCpuTimers_.execute.Begin();
851 #endif
852 // global begin backend frame
853 auto& descriptorSetMgr = (DescriptorSetManagerGles&)device_.GetDescriptorSetManager();
854 descriptorSetMgr.BeginBackendFrame();
855
856 // Reset bindings.
857 ResetState();
858
859 // Update global descset if needed
860 UpdateGlobalDescriptorSets();
861
862 for (const auto& ref : renderCommandFrameData.renderCommandContexts) {
863 // Reset bindings between command lists..
864 ResetBindings();
865 RenderSingleCommandList(ref);
866 }
867 #if (RENDER_PERF_ENABLED == 1)
868 commonCpuTimers_.execute.End();
869 #endif
870 RenderProcessEndCommandLists(renderCommandFrameData, backBufferConfig);
871 #if (RENDER_PERF_ENABLED == 1)
872 commonCpuTimers_.full.End();
873 EndFrameTimers();
874 #endif
875 }
876
RenderProcessEndCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)877 void RenderBackendGLES::RenderProcessEndCommandLists(
878 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
879 {
880 if (auto* frameSync = static_cast<RenderFrameSyncGLES*>(renderCommandFrameData.renderFrameSync); frameSync) {
881 frameSync->GetFrameFence();
882 }
883 // signal external GPU fences
884 if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
885 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
886 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
887 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
888 if (externalSignals.size() == externalSemaphores.size()) {
889 for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
890 // needs to be false
891 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
892 if (const auto* gs = (const GpuSemaphoreGles*)externalSemaphores[sigIdx].get(); gs) {
893 auto& plat = const_cast<GpuSemaphorePlatformDataGles&>(gs->GetPlatformData());
894 // NOTE: currently could create only one GPU sync
895 GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
896 plat.sync = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sync));
897 externalSignals[sigIdx].gpuSignalResourceHandle = plat.sync;
898 externalSignals[sigIdx].signaled = true;
899
900 // NOTE: client is expected to add code for the wait with glClientWaitSync(sync, X, 0)
901 }
902 }
903 }
904 }
905 }
906 }
907
RenderCommandUndefined(const RenderCommandWithType & renderCommand)908 void RenderBackendGLES::RenderCommandUndefined(const RenderCommandWithType& renderCommand)
909 {
910 PLUGIN_ASSERT_MSG(false, "non-valid render command");
911 }
912
RenderSingleCommandList(const RenderCommandContext & renderCommandCtx)913 void RenderBackendGLES::RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)
914 {
915 // these are validated in render graph
916 managers_ = { renderCommandCtx.nodeContextPsoMgr, renderCommandCtx.nodeContextPoolMgr,
917 renderCommandCtx.nodeContextDescriptorSetMgr, renderCommandCtx.renderBarrierList };
918
919 managers_.poolMgr->BeginBackendFrame();
920 managers_.psoMgr->BeginBackendFrame();
921
922 // update cmd list context descriptor sets
923 UpdateCommandListDescriptorSets(*renderCommandCtx.renderCommandList, *renderCommandCtx.nodeContextDescriptorSetMgr);
924
925 #if (RENDER_PERF_ENABLED == 1) || (RENDER_DEBUG_MARKERS_ENABLED == 1)
926 const auto& debugName = renderCommandCtx.debugName;
927 #endif
928 #if (RENDER_PERF_ENABLED == 1)
929 perfCounters_ = {};
930 PLUGIN_ASSERT(timers_.count(debugName) == 1);
931 PerfDataSet& perfDataSet = timers_[debugName];
932 perfDataSet.cpuTimer.Begin();
933 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
934 if (validGpuQueries_) {
935 #ifdef GL_GPU_DISJOINT_EXT
936 /* Clear disjoint error */
937 GLint disjointOccurred = 0;
938 glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
939 #endif
940 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
941 PLUGIN_ASSERT(gpuQuery);
942
943 const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
944 PLUGIN_ASSERT(platData.queryObject);
945 glBeginQuery(GL_TIME_ELAPSED_EXT, platData.queryObject);
946 }
947 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
948 #endif // RENDER_PERF_ENABLED
949 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
950 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)debugName.data());
951 #endif
952 commandListValid_ = true;
953 for (const auto& ref : renderCommandCtx.renderCommandList->GetRenderCommands()) {
954 PLUGIN_ASSERT(ref.rc);
955 if (commandListValid_) {
956 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
957 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)COMMAND_NAMES[(uint32_t)ref.type]);
958 #endif
959 (this->*(COMMAND_HANDLERS[static_cast<uint32_t>(ref.type)]))(ref);
960 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
961 glPopDebugGroup();
962 #endif
963 }
964 }
965 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
966 glPopDebugGroup();
967 #endif
968 #if (RENDER_PERF_ENABLED == 1)
969 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
970 if (validGpuQueries_) {
971 glEndQuery(GL_TIME_ELAPSED_EXT);
972 }
973 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
974 perfDataSet.cpuTimer.End();
975 CopyPerfTimeStamp(debugName, perfDataSet);
976 #endif // RENDER_PERF_ENABLED
977 }
978
RenderCommandBindPipeline(const RenderCommandWithType & ref)979 void RenderBackendGLES::RenderCommandBindPipeline(const RenderCommandWithType& ref)
980 {
981 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_PIPELINE);
982 const auto& renderCmd = *static_cast<const struct RenderCommandBindPipeline*>(ref.rc);
983 boundProgram_ = {};
984 if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE) {
985 PLUGIN_ASSERT(currentFrameBuffer_ == nullptr);
986 BindComputePipeline(renderCmd);
987 } else if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
988 BindGraphicsPipeline(renderCmd);
989 }
990 descriptorUpdate_ = true;
991 currentPsoHandle_ = renderCmd.psoHandle;
992 }
993
BindComputePipeline(const struct RenderCommandBindPipeline & renderCmd)994 void RenderBackendGLES::BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)
995 {
996 const auto* pso = static_cast<const ComputePipelineStateObjectGLES*>(
997 managers_.psoMgr->GetComputePso(renderCmd.psoHandle, nullptr));
998 boundComputePipeline_ = pso;
999 boundGraphicsPipeline_ = nullptr;
1000 boundComputeProgram_ = nullptr;
1001 boundShaderProgram_ = nullptr;
1002 if (!boundComputePipeline_) {
1003 return;
1004 }
1005
1006 // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
1007 const auto& pipelineData =
1008 static_cast<const PipelineStateObjectPlatformDataGL&>(boundComputePipeline_->GetPlatformData());
1009 if (!pipelineData.computeShader) {
1010 return;
1011 }
1012 boundComputeProgram_ = pipelineData.computeShader;
1013 const auto& sd = static_cast<const GpuComputeProgramPlatformDataGL&>(pipelineData.computeShader->GetPlatformData());
1014 const uint32_t program = sd.program;
1015 #if (RENDER_PERF_ENABLED == 1)
1016 if (device_.BoundProgram() != program) {
1017 ++perfCounters_.bindProgram;
1018 }
1019 #endif
1020 device_.UseProgram(program);
1021
1022 if (sd.flipLocation != Gles::INVALID_LOCATION) {
1023 const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
1024 glProgramUniform1fv(program, sd.flipLocation, 1, &flip);
1025 }
1026 }
1027
BindGraphicsPipeline(const struct RenderCommandBindPipeline & renderCmd)1028 void RenderBackendGLES::BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)
1029 {
1030 const auto* pso = static_cast<const GraphicsPipelineStateObjectGLES*>(
1031 managers_.psoMgr->GetGraphicsPso(renderCmd.psoHandle, activeRenderPass_.renderPassDesc,
1032 activeRenderPass_.subpasses, activeRenderPass_.subpassStartIndex, 0, nullptr, nullptr));
1033 boundComputePipeline_ = nullptr;
1034 boundGraphicsPipeline_ = pso;
1035 boundComputeProgram_ = nullptr;
1036 boundShaderProgram_ = nullptr;
1037 if (!boundGraphicsPipeline_ || !currentFrameBuffer_) {
1038 return;
1039 }
1040
1041 const auto& pipelineData = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1042 dynamicStateFlags_ = pipelineData.dynamicStateFlags;
1043 DoGraphicsState(pipelineData.graphicsState);
1044 // NOTE: Deprecate (default viewport/scissor should be set from default targets at some point)
1045 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_VIEWPORT)) {
1046 SetViewport(ViewportDesc { 0.0f, 0.0f, static_cast<float>(renderArea_.extentWidth),
1047 static_cast<float>(renderArea_.extentHeight), 0.0f, 1.0f });
1048 }
1049 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_SCISSOR)) {
1050 SetScissor(ScissorDesc { 0, 0, renderArea_.extentWidth, renderArea_.extentHeight });
1051 }
1052 const GpuShaderProgramGLES* shader = pipelineData.graphicsShader;
1053 if (!shader) {
1054 return;
1055 }
1056 boundShaderProgram_ = shader;
1057 const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
1058 // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
1059 if (!scissorEnabled_) {
1060 scissorEnabled_ = true;
1061 glEnable(GL_SCISSOR_TEST); // Always enabled
1062 }
1063 uint32_t program = sd.program;
1064 #if (RENDER_PERF_ENABLED == 1)
1065 if (device_.BoundProgram() != program) {
1066 ++perfCounters_.bindProgram;
1067 }
1068 #endif
1069 device_.UseProgram(program);
1070 device_.BindVertexArray(pipelineData.vao);
1071 vertexBufferUpdate_ = true;
1072 indexBufferUpdate_ = true;
1073
1074 if (sd.flipLocation != Gles::INVALID_LOCATION) {
1075 const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
1076 glProgramUniform1fv(program, sd.flipLocation, 1, &flip);
1077 }
1078 }
1079
RenderCommandDraw(const RenderCommandWithType & ref)1080 void RenderBackendGLES::RenderCommandDraw(const RenderCommandWithType& ref)
1081 {
1082 PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW);
1083 const auto& renderCmd = *static_cast<struct RenderCommandDraw*>(ref.rc);
1084 if (!boundGraphicsPipeline_) {
1085 return;
1086 }
1087 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1088 BindResources();
1089 if (vertexBufferUpdate_ && boundShaderProgram_) {
1090 vertexBufferUpdate_ = false;
1091 const auto& pipelineData =
1092 static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
1093 const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(boundShaderProgram_->GetPlatformData());
1094 BindVertexInputs(pipelineData.vertexInputDeclaration, array_view<const int32_t>(sd.inputs, countof(sd.inputs)));
1095 }
1096 if (indexBufferUpdate_) {
1097 indexBufferUpdate_ = false;
1098 device_.BindElementBuffer(boundIndexBuffer_.id);
1099 }
1100 const auto type = GetPrimFromTopology(topology_);
1101 const auto instanceCount = static_cast<GLsizei>(renderCmd.instanceCount);
1102 // firstInstance is not supported yet, need to set the SPIRV_Cross generated uniform
1103 // "SPIRV_Cross_BaseInstance" to renderCmd.firstInstance;
1104 if (renderCmd.indexCount) {
1105 uintptr_t offsetp = boundIndexBuffer_.offset;
1106 GLenum indexType = GL_UNSIGNED_SHORT;
1107 switch (boundIndexBuffer_.type) {
1108 case CORE_INDEX_TYPE_UINT16:
1109 offsetp += renderCmd.firstIndex * sizeof(uint16_t);
1110 indexType = GL_UNSIGNED_SHORT;
1111 break;
1112 case CORE_INDEX_TYPE_UINT32:
1113 offsetp += renderCmd.firstIndex * sizeof(uint32_t);
1114 indexType = GL_UNSIGNED_INT;
1115 break;
1116 default:
1117 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1118 break;
1119 }
1120 const auto indexCount = static_cast<const GLsizei>(renderCmd.indexCount);
1121 const auto vertexOffset = static_cast<const GLsizei>(renderCmd.vertexOffset);
1122 const void* offset = reinterpret_cast<const void*>(offsetp);
1123 if (renderCmd.instanceCount > 1) {
1124 if (vertexOffset) {
1125 glDrawElementsInstancedBaseVertex(type, indexCount, indexType, offset, instanceCount, vertexOffset);
1126 } else {
1127 glDrawElementsInstanced(type, indexCount, indexType, offset, instanceCount);
1128 }
1129 } else {
1130 if (vertexOffset) {
1131 glDrawElementsBaseVertex(type, indexCount, indexType, offset, vertexOffset);
1132 } else {
1133 glDrawElements(type, indexCount, indexType, offset);
1134 }
1135 }
1136 #if (RENDER_PERF_ENABLED == 1)
1137 ++perfCounters_.drawCount;
1138 perfCounters_.instanceCount += renderCmd.instanceCount;
1139 perfCounters_.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1140 #endif
1141 } else {
1142 const auto firstVertex = static_cast<const GLsizei>(renderCmd.firstVertex);
1143 const auto vertexCount = static_cast<const GLsizei>(renderCmd.vertexCount);
1144 if (renderCmd.instanceCount > 1) {
1145 glDrawArraysInstanced(type, firstVertex, vertexCount, instanceCount);
1146 } else {
1147 glDrawArrays(type, firstVertex, vertexCount);
1148 }
1149 #if (RENDER_PERF_ENABLED == 1)
1150 ++perfCounters_.drawCount;
1151 perfCounters_.instanceCount += renderCmd.instanceCount;
1152 perfCounters_.triangleCount += (renderCmd.vertexCount * 3) * renderCmd.instanceCount; // 3: vertex dimension
1153 #endif
1154 }
1155 }
1156
RenderCommandDrawIndirect(const RenderCommandWithType & ref)1157 void RenderBackendGLES::RenderCommandDrawIndirect(const RenderCommandWithType& ref)
1158 {
1159 PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW_INDIRECT);
1160 const auto& renderCmd = *static_cast<const struct RenderCommandDrawIndirect*>(ref.rc);
1161 if (!boundGraphicsPipeline_) {
1162 return;
1163 }
1164 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1165 if (vertexBufferUpdate_ && boundShaderProgram_) {
1166 vertexBufferUpdate_ = false;
1167 const auto& pipelineData =
1168 static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
1169 const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(boundShaderProgram_->GetPlatformData());
1170 BindVertexInputs(pipelineData.vertexInputDeclaration, array_view<const int32_t>(sd.inputs, countof(sd.inputs)));
1171 }
1172 if (indexBufferUpdate_) {
1173 indexBufferUpdate_ = false;
1174 device_.BindElementBuffer(boundIndexBuffer_.id);
1175 }
1176 BindResources();
1177 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1178 const auto& plat = gpuBuffer->GetPlatformData();
1179 device_.BindBuffer(GL_DRAW_INDIRECT_BUFFER, plat.buffer);
1180 const auto type = GetPrimFromTopology(topology_);
1181 auto offset = static_cast<GLintptr>(renderCmd.offset);
1182 if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1183 GLenum indexType = GL_UNSIGNED_SHORT;
1184 switch (boundIndexBuffer_.type) {
1185 case CORE_INDEX_TYPE_UINT16:
1186 indexType = GL_UNSIGNED_SHORT;
1187 break;
1188 case CORE_INDEX_TYPE_UINT32:
1189 indexType = GL_UNSIGNED_INT;
1190 break;
1191 default:
1192 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1193 break;
1194 }
1195 for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1196 glDrawElementsIndirect(type, indexType, reinterpret_cast<const void*>(offset));
1197 offset += renderCmd.stride;
1198 }
1199 } else {
1200 for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1201 glDrawArraysIndirect(type, reinterpret_cast<const void*>(offset));
1202 offset += renderCmd.stride;
1203 }
1204 }
1205 #if (RENDER_PERF_ENABLED == 1)
1206 perfCounters_.drawIndirectCount += renderCmd.drawCount;
1207 #endif
1208 }
1209 }
1210
RenderCommandDispatch(const RenderCommandWithType & ref)1211 void RenderBackendGLES::RenderCommandDispatch(const RenderCommandWithType& ref)
1212 {
1213 PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH);
1214 const auto& renderCmd = *static_cast<const struct RenderCommandDispatch*>(ref.rc);
1215 if (!boundComputePipeline_) {
1216 return;
1217 }
1218 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1219 BindResources();
1220 glDispatchCompute(renderCmd.groupCountX, renderCmd.groupCountY, renderCmd.groupCountZ);
1221 #if (RENDER_PERF_ENABLED == 1)
1222 ++perfCounters_.dispatchCount;
1223 #endif
1224 }
1225
RenderCommandDispatchIndirect(const RenderCommandWithType & ref)1226 void RenderBackendGLES::RenderCommandDispatchIndirect(const RenderCommandWithType& ref)
1227 {
1228 PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH_INDIRECT);
1229 const auto& renderCmd = *static_cast<const struct RenderCommandDispatchIndirect*>(ref.rc);
1230 if (!boundComputePipeline_) {
1231 return;
1232 }
1233 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1234 BindResources();
1235 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1236 const auto& plat = gpuBuffer->GetPlatformData();
1237 device_.BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, plat.buffer);
1238 glDispatchComputeIndirect(static_cast<GLintptr>(renderCmd.offset));
1239 #if (RENDER_PERF_ENABLED == 1)
1240 ++perfCounters_.dispatchIndirectCount;
1241 #endif
1242 }
1243 }
1244
ClearScissorInit(const RenderPassDesc::RenderArea & aArea)1245 void RenderBackendGLES::ClearScissorInit(const RenderPassDesc::RenderArea& aArea)
1246 {
1247 resetScissor_ = false; // need to reset scissor state after clear?
1248 clearScissorSet_ = true; // need to setup clear scissors before clear?
1249 clearScissor_ = aArea; // area to be cleared
1250 if (scissorPrimed_) { // have scissors been set yet?
1251 if ((clearScissor_.offsetX == scissorBox_.offsetX) && (clearScissor_.offsetY == scissorBox_.offsetY) &&
1252 (clearScissor_.extentWidth == scissorBox_.extentWidth) &&
1253 (clearScissor_.extentHeight == scissorBox_.extentHeight)) {
1254 // Current scissors match clearscissor area, so no need to set it again.
1255 clearScissorSet_ = false;
1256 }
1257 }
1258 }
1259
ClearScissorSet()1260 void RenderBackendGLES::ClearScissorSet()
1261 {
1262 if (clearScissorSet_) { // do we need to set clear scissors.
1263 clearScissorSet_ = false; // clear scissors have been set now.
1264 resetScissor_ = true; // we are modifying scissors, so remember to reset them afterwards.
1265 glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1266 static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1267 }
1268 }
1269
ClearScissorReset()1270 void RenderBackendGLES::ClearScissorReset()
1271 {
1272 if (resetScissor_) { // need to reset correct scissors?
1273 if (!scissorPrimed_) {
1274 // scissors have not been set yet, so use clearbox as current cache state (and don't change scissor
1275 // setting)
1276 scissorPrimed_ = true;
1277 glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1278 static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1279 scissorBox_.offsetX = clearScissor_.offsetX;
1280 scissorBox_.offsetY = clearScissor_.offsetY;
1281 scissorBox_.extentHeight = clearScissor_.extentHeight;
1282 scissorBox_.extentWidth = clearScissor_.extentWidth;
1283 } else {
1284 // Restore scissor box to cached state. (update scissors when needed, since clearBox != scissorBox)
1285 glScissor(static_cast<GLint>(scissorBox_.offsetX), static_cast<GLint>(scissorBox_.offsetY),
1286 static_cast<GLsizei>(scissorBox_.extentWidth), static_cast<GLsizei>(scissorBox_.extentHeight));
1287 }
1288 }
1289 }
1290
HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc * > colorAttachments)1291 void RenderBackendGLES::HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)
1292 {
1293 constexpr ColorComponentFlags clearAll = CORE_COLOR_COMPONENT_R_BIT | CORE_COLOR_COMPONENT_G_BIT |
1294 CORE_COLOR_COMPONENT_B_BIT | CORE_COLOR_COMPONENT_A_BIT;
1295 const auto& cBlend = cacheState_.colorBlendState;
1296 for (uint32_t idx = 0; idx < colorAttachments.size(); ++idx) {
1297 if (colorAttachments[idx] == nullptr) {
1298 continue;
1299 }
1300 const auto& ref = *(colorAttachments[idx]);
1301 if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1302 const auto& cBlendState = cBlend.colorAttachments[idx];
1303 if (clearAll != cBlendState.colorWriteMask) {
1304 glColorMaski(idx, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1305 }
1306 ClearScissorSet();
1307 // glClearBufferfv only for float formats?
1308 // glClearBufferiv & glClearbufferuv only for integer formats?
1309 glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), ref.clearValue.color.float32);
1310 if (clearAll != cBlendState.colorWriteMask) {
1311 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1312 glColorMaski(idx, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
1313 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
1314 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
1315 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
1316 }
1317 }
1318 }
1319 }
1320
HandleDepthAttachment(const RenderPassDesc::AttachmentDesc & depthAttachment)1321 void RenderBackendGLES::HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)
1322 {
1323 const GLuint allBits = 0xFFFFFFFFu;
1324 const auto& ref = depthAttachment;
1325 const bool clearDepth = (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1326 const bool clearStencil = (ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1327 // Change state if needed.
1328 if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1329 glDepthMask(GL_TRUE);
1330 }
1331 if (clearStencil) {
1332 if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1333 glStencilMaskSeparate(GL_FRONT, allBits);
1334 }
1335 if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1336 glStencilMaskSeparate(GL_BACK, allBits);
1337 }
1338 }
1339 if (clearDepth || clearStencil) {
1340 // Set the scissors for clear..
1341 ClearScissorSet();
1342 }
1343 // Do clears.
1344 if (clearDepth && clearStencil) {
1345 glClearBufferfi(GL_DEPTH_STENCIL, 0, ref.clearValue.depthStencil.depth,
1346 static_cast<GLint>(ref.clearValue.depthStencil.stencil));
1347 } else if (clearDepth) {
1348 glClearBufferfv(GL_DEPTH, 0, &ref.clearValue.depthStencil.depth);
1349 } else if (clearStencil) {
1350 glClearBufferiv(GL_STENCIL, 0, reinterpret_cast<const GLint*>(&ref.clearValue.depthStencil.stencil));
1351 }
1352
1353 // Restore cached state, if we touched the state.
1354 if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1355 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1356 glDepthMask(GL_FALSE);
1357 }
1358 if (clearStencil) {
1359 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1360 if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1361 glStencilMaskSeparate(GL_FRONT, cacheState_.depthStencilState.frontStencilOpState.writeMask);
1362 }
1363 if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1364 glStencilMaskSeparate(GL_BACK, cacheState_.depthStencilState.backStencilOpState.writeMask);
1365 }
1366 }
1367 }
1368
DoSubPass(uint32_t subPass)1369 void RenderBackendGLES::DoSubPass(uint32_t subPass)
1370 {
1371 if (currentFrameBuffer_ == nullptr) {
1372 // Completely invalid state in backend.
1373 return;
1374 }
1375 const auto& rpd = activeRenderPass_.renderPassDesc;
1376 const auto& sb = activeRenderPass_.subpasses[subPass];
1377
1378 // If there's no FBO activate the with swapchain handle so that drawing happens to the correct surface.
1379 if (!currentFrameBuffer_->fbos[subPass].fbo && (sb.colorAttachmentCount == 1U)) {
1380 auto color = rpd.attachmentHandles[sb.colorAttachmentIndices[0]];
1381 device_.Activate(color);
1382 }
1383 device_.BindFrameBuffer(currentFrameBuffer_->fbos[subPass].fbo);
1384 ClearScissorInit(renderArea_);
1385 if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1386 SetState(GL_RASTERIZER_DISCARD, GL_FALSE);
1387 }
1388 {
1389 // NOTE: clear is not yet optimal. depth, stencil and color should be cleared using ONE glClear call if
1390 // possible. (ie. all buffers at once)
1391 renderingToDefaultFbo_ = false;
1392 if (sb.colorAttachmentCount > 0) {
1393 // collect color attachment infos..
1394 const RenderPassDesc::AttachmentDesc*
1395 colorAttachments[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1396 for (uint32_t ci = 0; ci < sb.colorAttachmentCount; ci++) {
1397 uint32_t index = sb.colorAttachmentIndices[ci];
1398 if (resolveToBackbuffer_[index]) {
1399 // NOTE: this could fail with multiple color attachments....
1400 renderingToDefaultFbo_ = true;
1401 }
1402 if (!attachmentCleared_[index]) {
1403 attachmentCleared_[index] = true;
1404 colorAttachments[ci] = &rpd.attachments[index];
1405 } else {
1406 colorAttachments[ci] = nullptr;
1407 }
1408 }
1409 HandleColorAttachments(array_view(colorAttachments, sb.colorAttachmentCount));
1410 }
1411 if (sb.depthAttachmentCount) {
1412 if (!attachmentCleared_[sb.depthAttachmentIndex]) {
1413 attachmentCleared_[sb.depthAttachmentIndex] = true;
1414 HandleDepthAttachment(rpd.attachments[sb.depthAttachmentIndex]);
1415 }
1416 }
1417 }
1418 if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1419 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1420 SetState(GL_RASTERIZER_DISCARD, GL_TRUE);
1421 }
1422 ClearScissorReset();
1423
1424 if (viewportPending_) {
1425 viewportPending_ = false;
1426 // Handle top-left / bottom-left origin conversion
1427 auto y = static_cast<GLint>(viewport_.y);
1428 const auto h = static_cast<GLsizei>(viewport_.height);
1429 if (renderingToDefaultFbo_) {
1430 const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
1431 y = fh - (y + h);
1432 }
1433 glViewport(static_cast<GLint>(viewport_.x), y, static_cast<GLsizei>(viewport_.width), h);
1434 }
1435 }
1436
ScanPasses(const RenderPassDesc & rpd)1437 void RenderBackendGLES::ScanPasses(const RenderPassDesc& rpd)
1438 {
1439 for (uint32_t sub = 0; sub < rpd.subpassCount; sub++) {
1440 const auto& currentSubPass = activeRenderPass_.subpasses[sub];
1441 for (uint32_t ci = 0; ci < currentSubPass.resolveAttachmentCount; ci++) {
1442 const uint32_t resolveTo = currentSubPass.resolveAttachmentIndices[ci];
1443 if (!attachmentImage_[resolveTo]) {
1444 PLUGIN_LOG_ONCE_E(to_string(resolveTo), "Missing attachment %u", resolveTo);
1445 continue;
1446 }
1447 if (attachmentFirstUse_[resolveTo] == 0xFFFFFFFF) {
1448 attachmentFirstUse_[resolveTo] = sub;
1449 }
1450 attachmentLastUse_[resolveTo] = sub;
1451 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[resolveTo]->GetPlatformData());
1452 if ((p.image == 0) && (p.renderBuffer == 0)) {
1453 // mark the "resolveFrom" (ie. the colorattachment) as "backbuffer-like", since we resolve to
1454 // backbuffer...
1455 uint32_t resolveFrom = currentSubPass.colorAttachmentIndices[ci];
1456 resolveToBackbuffer_[resolveFrom] = true;
1457 }
1458 }
1459 for (uint32_t ci = 0; ci < currentSubPass.inputAttachmentCount; ci++) {
1460 uint32_t index = currentSubPass.inputAttachmentIndices[ci];
1461 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1462 attachmentFirstUse_[index] = sub;
1463 }
1464 attachmentLastUse_[index] = sub;
1465 }
1466 for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1467 uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1468 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1469 attachmentFirstUse_[index] = sub;
1470 }
1471 attachmentLastUse_[index] = sub;
1472 if (attachmentImage_[index]) {
1473 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[index]->GetPlatformData());
1474 if ((p.image == 0) && (p.renderBuffer == 0)) {
1475 resolveToBackbuffer_[index] = true;
1476 }
1477 }
1478 }
1479 if (currentSubPass.depthAttachmentCount > 0) {
1480 uint32_t index = currentSubPass.depthAttachmentIndex;
1481 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1482 attachmentFirstUse_[index] = sub;
1483 }
1484 attachmentLastUse_[index] = sub;
1485 }
1486 }
1487 }
1488
RenderCommandBeginRenderPass(const RenderCommandWithType & ref)1489 void RenderBackendGLES::RenderCommandBeginRenderPass(const RenderCommandWithType& ref)
1490 {
1491 PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
1492 const auto& renderCmd = *static_cast<const struct RenderCommandBeginRenderPass*>(ref.rc);
1493 switch (renderCmd.beginType) {
1494 case RenderPassBeginType::RENDER_PASS_BEGIN: {
1495 ++inRenderpass_;
1496 PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES beginrenderpass mInRenderpass %u", inRenderpass_);
1497 activeRenderPass_ = renderCmd; // Store this because we need it later (in NextRenderPass)
1498
1499 const auto& rpd = activeRenderPass_.renderPassDesc;
1500 renderArea_ = rpd.renderArea; // can subpasses have different render areas?
1501 auto& cpm = *(static_cast<NodeContextPoolManagerGLES*>(managers_.poolMgr));
1502 if (multisampledRenderToTexture_) {
1503 cpm.FilterRenderPass(activeRenderPass_);
1504 }
1505 currentFrameBuffer_ = cpm.GetFramebuffer(cpm.GetFramebufferHandle(activeRenderPass_));
1506 if (!currentFrameBuffer_) {
1507 // Completely invalid state in backend.
1508 commandListValid_ = false;
1509 --inRenderpass_;
1510 return;
1511 }
1512 PLUGIN_ASSERT_MSG(
1513 activeRenderPass_.subpassStartIndex == 0, "activeRenderPass_.subpassStartIndex != 0 not handled!");
1514 currentSubPass_ = 0;
1515 // find first and last use, clear clearflags. (this could be cached in the lowlewel classes)
1516 for (uint32_t i = 0; i < rpd.attachmentCount; i++) {
1517 attachmentCleared_[i] = false;
1518 attachmentFirstUse_[i] = 0xFFFFFFFF;
1519 attachmentLastUse_[i] = 0;
1520 resolveToBackbuffer_[i] = false;
1521 attachmentImage_[i] =
1522 static_cast<const GpuImageGLES*>(gpuResourceMgr_.GetImage(rpd.attachmentHandles[i]));
1523 }
1524 ScanPasses(rpd);
1525 DoSubPass(0);
1526 #if (RENDER_PERF_ENABLED == 1)
1527 ++perfCounters_.renderPassCount;
1528 #endif
1529 } break;
1530
1531 case RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN: {
1532 currentSubPass_ = renderCmd.subpassStartIndex;
1533 PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1534 if (!currentFrameBuffer_) {
1535 // Completely invalid state in backend.
1536 commandListValid_ = false;
1537 return;
1538 }
1539 DoSubPass(activeRenderPass_.subpassStartIndex);
1540 } break;
1541
1542 default:
1543 break;
1544 }
1545 }
1546
RenderCommandNextSubpass(const RenderCommandWithType & ref)1547 void RenderBackendGLES::RenderCommandNextSubpass(const RenderCommandWithType& ref)
1548 {
1549 PLUGIN_ASSERT(ref.type == RenderCommandType::NEXT_SUBPASS);
1550 const auto& renderCmd = *static_cast<const struct RenderCommandNextSubpass*>(ref.rc);
1551 PLUGIN_UNUSED(renderCmd);
1552 PLUGIN_ASSERT(renderCmd.subpassContents == SubpassContents::CORE_SUBPASS_CONTENTS_INLINE);
1553 ++currentSubPass_;
1554 PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1555 DoSubPass(currentSubPass_);
1556 }
1557
InvalidateDepthStencil(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1558 int32_t RenderBackendGLES::InvalidateDepthStencil(
1559 array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1560 {
1561 int32_t depthCount = 0;
1562 if (currentSubPass.depthAttachmentCount == 0) {
1563 return depthCount; // early out
1564 }
1565 const uint32_t index = currentSubPass.depthAttachmentIndex;
1566 if (attachmentLastUse_[index] != currentSubPass_) {
1567 return depthCount; // early out
1568 }
1569 // is last use of the attachment
1570 const auto& image = attachmentImage_[index];
1571 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1572 // NOTE: we expect the depth to be in FBO in this case even if there would be a depth target in render pass
1573 if ((dplat.image || dplat.renderBuffer) && (!renderingToDefaultFbo_)) {
1574 bool depth = false;
1575 bool stencil = false;
1576 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1577 if ((dplat.format == GL_DEPTH_COMPONENT) || (dplat.format == GL_DEPTH_STENCIL)) {
1578 depth = true;
1579 }
1580 }
1581 if (rpd.attachments[index].stencilStoreOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1582 if ((dplat.format == GL_STENCIL) || (dplat.format == GL_DEPTH_STENCIL)) {
1583 stencil = true;
1584 }
1585 }
1586 if (depth && stencil) {
1587 invalidateAttachment[0] = GL_DEPTH_STENCIL_ATTACHMENT;
1588 depthCount++;
1589 } else if (stencil) {
1590 invalidateAttachment[0] = GL_STENCIL_ATTACHMENT;
1591 depthCount++;
1592 } else if (depth) {
1593 invalidateAttachment[0] = GL_DEPTH_ATTACHMENT;
1594 depthCount++;
1595 }
1596 }
1597 return depthCount;
1598 }
1599
InvalidateColor(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1600 int32_t RenderBackendGLES::InvalidateColor(
1601 array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1602 {
1603 int32_t colorCount = 0;
1604 // see which parts of the fbo can be invalidated...
1605 // collect color attachment infos..
1606 for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1607 const uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1608 if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1609 if (const auto* image = attachmentImage_[index]) {
1610 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1611 if (dplat.image || dplat.renderBuffer) {
1612 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1613 invalidateAttachment[static_cast<size_t>(colorCount)] = GL_COLOR_ATTACHMENT0 + ci;
1614 colorCount++;
1615 }
1616 }
1617 }
1618 }
1619 }
1620 return colorCount;
1621 }
1622
ResolveMSAA(const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1623 uint32_t RenderBackendGLES::ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1624 {
1625 const GLbitfield mask = ((currentSubPass.resolveAttachmentCount > 0u) ? GL_COLOR_BUFFER_BIT : 0u) |
1626 ((currentSubPass.depthResolveAttachmentCount > 0u) ? GL_DEPTH_BUFFER_BIT : 0u);
1627 if (!mask) {
1628 return GL_FRAMEBUFFER;
1629 }
1630
1631 if (scissorEnabled_) {
1632 glDisable(GL_SCISSOR_TEST);
1633 scissorEnabled_ = false;
1634 }
1635
1636 // Resolve MSAA buffers.
1637 // NOTE: ARM recommends NOT to use glBlitFramebuffer here
1638 if (!currentSubPass.viewMask) {
1639 device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1640 device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1641
1642 glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1643 static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1644 static_cast<GLint>(currentFrameBuffer_->height), mask, GL_NEAREST);
1645 } else {
1646 // Layers need to be resolved one by one. Create temporary FBOs and go through the layers.
1647 GLuint frameBuffers[2U]; // 2: buffer size
1648 glGenFramebuffers(2, frameBuffers); // 2: buffer size
1649 device_.BindReadFrameBuffer(frameBuffers[0U]);
1650 device_.BindWriteFrameBuffer(frameBuffers[1U]);
1651
1652 const auto& srcImage =
1653 gpuResourceMgr_.GetImage(rpd.attachmentHandles[currentSubPass.colorAttachmentIndices[0U]]);
1654 if (srcImage == nullptr) {
1655 return GL_FRAMEBUFFER;
1656 }
1657 const auto& srcPlat = static_cast<const GpuImagePlatformDataGL&>(srcImage->GetBasePlatformData());
1658 const auto& dstImage =
1659 gpuResourceMgr_.GetImage(rpd.attachmentHandles[currentSubPass.resolveAttachmentIndices[0U]]);
1660 if (dstImage == nullptr) {
1661 return GL_FRAMEBUFFER;
1662 }
1663 const auto& dstPlat = static_cast<const GpuImagePlatformDataGL&>(dstImage->GetBasePlatformData());
1664 auto viewMask = currentSubPass.viewMask;
1665 auto layer = 0;
1666 while (viewMask) {
1667 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcPlat.image, 0, layer);
1668 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstPlat.image, 0, layer);
1669
1670 glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1671 static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1672 static_cast<GLint>(currentFrameBuffer_->height), mask, GL_NEAREST);
1673 viewMask >>= 1U;
1674 ++layer;
1675 }
1676 glDeleteFramebuffers(2, frameBuffers); // 2: buffer size
1677
1678 // invalidation exepcts to find the actual FBOs
1679 device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1680 device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1681 }
1682 return GL_READ_FRAMEBUFFER;
1683 }
1684
RenderCommandEndRenderPass(const RenderCommandWithType & ref)1685 void RenderBackendGLES::RenderCommandEndRenderPass(const RenderCommandWithType& ref)
1686 {
1687 PLUGIN_ASSERT(ref.type == RenderCommandType::END_RENDER_PASS);
1688 const auto& renderCmd = *static_cast<const struct RenderCommandEndRenderPass*>(ref.rc);
1689 if (renderCmd.endType == RenderPassEndType::END_RENDER_PASS) {
1690 PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES endrenderpass mInRenderpass %u", inRenderpass_);
1691 inRenderpass_--;
1692 }
1693 if (currentFrameBuffer_ == nullptr) {
1694 // Completely invalid state in backend.
1695 return;
1696 }
1697 const auto& rpd = activeRenderPass_.renderPassDesc;
1698 const auto& currentSubPass = activeRenderPass_.subpasses[currentSubPass_];
1699
1700 // Resolve MSAA
1701 const uint32_t fbType = ResolveMSAA(rpd, currentSubPass);
1702
1703 // Finally invalidate color and depth..
1704 GLenum invalidate[PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT + 1] = {};
1705 int32_t invalidateCount = InvalidateColor(invalidate, rpd, currentSubPass);
1706 invalidateCount += InvalidateDepthStencil(
1707 array_view(invalidate + invalidateCount, countof(invalidate) - invalidateCount), rpd, currentSubPass);
1708
1709 // NOTE: all attachments should be the same size AND mCurrentFrameBuffer->width/height should match that!
1710 Invalidate(fbType, invalidateCount, invalidate, rpd, *currentFrameBuffer_);
1711
1712 if (inRenderpass_ == 0) {
1713 currentFrameBuffer_ = nullptr;
1714 }
1715 }
1716
RenderCommandBindVertexBuffers(const RenderCommandWithType & ref)1717 void RenderBackendGLES::RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)
1718 {
1719 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_VERTEX_BUFFERS);
1720 const auto& renderCmd = *static_cast<const struct RenderCommandBindVertexBuffers*>(ref.rc);
1721 PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1722 PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1723 if (!boundGraphicsPipeline_ || !boundShaderProgram_) {
1724 return;
1725 }
1726 vertexAttribBinds_ = renderCmd.vertexBufferCount;
1727 for (size_t i = 0; i < renderCmd.vertexBufferCount; i++) {
1728 const auto& currVb = renderCmd.vertexBuffers[i];
1729 if (const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(currVb.bufferHandle); gpuBuffer) {
1730 const auto& plat = gpuBuffer->GetPlatformData();
1731 uintptr_t offset = currVb.bufferOffset;
1732 offset += plat.currentByteOffset;
1733 vertexAttribBindSlots_[i].id = plat.buffer;
1734 vertexAttribBindSlots_[i].offset = static_cast<intptr_t>(offset);
1735 } else {
1736 vertexAttribBindSlots_[i].id = 0;
1737 vertexAttribBindSlots_[i].offset = 0;
1738 }
1739 }
1740 vertexBufferUpdate_ = true;
1741 }
1742
RenderCommandBindIndexBuffer(const RenderCommandWithType & ref)1743 void RenderBackendGLES::RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)
1744 {
1745 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_INDEX_BUFFER);
1746 const auto& renderCmd = *static_cast<const struct RenderCommandBindIndexBuffer*>(ref.rc);
1747 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.indexBuffer.bufferHandle);
1748 gpuBuffer) {
1749 const auto& plat = gpuBuffer->GetPlatformData();
1750 boundIndexBuffer_.offset = renderCmd.indexBuffer.bufferOffset;
1751 boundIndexBuffer_.offset += plat.currentByteOffset;
1752 boundIndexBuffer_.type = renderCmd.indexBuffer.indexType;
1753 boundIndexBuffer_.id = plat.buffer;
1754 }
1755 indexBufferUpdate_ = true;
1756 }
1757
RenderCommandBlitImage(const RenderCommandWithType & ref)1758 void RenderBackendGLES::RenderCommandBlitImage(const RenderCommandWithType& ref)
1759 {
1760 PLUGIN_ASSERT(ref.type == RenderCommandType::BLIT_IMAGE);
1761 const auto& renderCmd = *static_cast<const struct RenderCommandBlitImage*>(ref.rc);
1762 const auto* srcImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1763 const auto* dstImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1764 if ((srcImage == nullptr) || (dstImage == nullptr)) {
1765 return;
1766 }
1767 const auto& srcDesc = srcImage->GetDesc();
1768 const auto& srcPlat = srcImage->GetPlatformData();
1769 const auto& dstDesc = dstImage->GetDesc();
1770 const auto& dstPlat = dstImage->GetPlatformData();
1771 const auto& srcRect = renderCmd.imageBlit.srcOffsets;
1772 const auto& dstRect = renderCmd.imageBlit.dstOffsets;
1773 const auto& src = renderCmd.imageBlit.srcSubresource;
1774 const auto& dst = renderCmd.imageBlit.dstSubresource;
1775 const auto srcMipLevel = static_cast<GLint>(src.mipLevel);
1776 const auto dstMipLevel = static_cast<GLint>(dst.mipLevel);
1777 const auto srcSampleCount = static_cast<uint32_t>(srcDesc.sampleCountFlags);
1778 const auto dstSampleCount = static_cast<uint32_t>(dstDesc.sampleCountFlags);
1779 PLUGIN_ASSERT_MSG(src.layerCount == dst.layerCount, "Source and Destination layercounts do not match!");
1780 PLUGIN_ASSERT_MSG(inRenderpass_ == 0, "RenderCommandBlitImage while inRenderPass");
1781 glDisable(GL_SCISSOR_TEST);
1782 scissorEnabled_ = false;
1783 // NOTE: LAYERS! (texture arrays)
1784 device_.BindReadFrameBuffer(blitImageSourceFbo_);
1785 device_.BindWriteFrameBuffer(blitImageDestinationFbo_);
1786 for (uint32_t layer = 0; layer < src.layerCount; layer++) {
1787 const GLenum srcType = GetTarget(srcPlat.type, layer, srcSampleCount);
1788 const GLenum dstType = GetTarget(dstPlat.type, layer, dstSampleCount);
1789 // glFramebufferTextureLayer for array textures....
1790 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, srcPlat.image, srcMipLevel);
1791 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, dstPlat.image, dstMipLevel);
1792 DoBlit(renderCmd.filter, { src.mipLevel, srcRect[0], srcRect[1], srcDesc.height },
1793 { dst.mipLevel, dstRect[0], dstRect[1], dstDesc.height });
1794 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, 0, 0);
1795 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, 0, 0);
1796 }
1797 }
1798
RenderCommandCopyBuffer(const RenderCommandWithType & ref)1799 void RenderBackendGLES::RenderCommandCopyBuffer(const RenderCommandWithType& ref)
1800 {
1801 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER);
1802 const auto& renderCmd = *static_cast<const struct RenderCommandCopyBuffer*>(ref.rc);
1803 const auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1804 const auto* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.dstHandle);
1805 if (srcGpuBuffer && dstGpuBuffer) {
1806 const auto& srcData = srcGpuBuffer->GetPlatformData();
1807 const auto& dstData = dstGpuBuffer->GetPlatformData();
1808 const auto oldBindR = device_.BoundBuffer(GL_COPY_READ_BUFFER);
1809 const auto oldBindW = device_.BoundBuffer(GL_COPY_WRITE_BUFFER);
1810 device_.BindBuffer(GL_COPY_READ_BUFFER, srcData.buffer);
1811 device_.BindBuffer(GL_COPY_WRITE_BUFFER, dstData.buffer);
1812 glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
1813 static_cast<GLintptr>(renderCmd.bufferCopy.srcOffset),
1814 static_cast<GLintptr>(renderCmd.bufferCopy.dstOffset), static_cast<GLsizeiptr>(renderCmd.bufferCopy.size));
1815 device_.BindBuffer(GL_COPY_READ_BUFFER, oldBindR);
1816 device_.BindBuffer(GL_COPY_WRITE_BUFFER, oldBindW);
1817 }
1818 }
1819
BufferToImageCopy(const struct RenderCommandCopyBufferImage & renderCmd)1820 void RenderBackendGLES::BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1821 {
1822 #if (RENDER_HAS_GLES_BACKEND == 1) & defined(_WIN32)
1823 // use the workaround only for gles backend on windows. (pvr simulator bug)
1824 constexpr const bool usePixelUnpackBuffer = false;
1825 #else
1826 // expect this to work, and the nvidia bug to be fixed.
1827 constexpr const bool usePixelUnpackBuffer = true;
1828 #endif
1829 auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1830 auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1831 if ((srcGpuBuffer == nullptr) || (dstGpuImage == nullptr)) {
1832 return;
1833 }
1834 const auto info = SetupBlit<usePixelUnpackBuffer>(device_, renderCmd.bufferImageCopy, *srcGpuBuffer, *dstGpuImage);
1835 if (info.iPlat.type == GL_TEXTURE_CUBE_MAP) {
1836 BlitCube(device_, info);
1837 } else if (info.iPlat.type == GL_TEXTURE_2D) {
1838 Blit2D(device_, info);
1839 } else if (info.iPlat.type == GL_TEXTURE_2D_ARRAY) {
1840 BlitArray(device_, info);
1841 } else if (info.iPlat.type == GL_TEXTURE_3D) {
1842 Blit3D(device_, info);
1843 #if RENDER_HAS_GLES_BACKEND
1844 } else if (info.iPlat.type == GL_TEXTURE_EXTERNAL_OES) {
1845 PLUGIN_LOG_E("Tried to copy to GL_TEXTURE_EXTERNAL_OES. Ignored!");
1846 #endif
1847 } else {
1848 PLUGIN_ASSERT_MSG(false, "RenderCommandCopyBufferImage unhandled type");
1849 }
1850 FinishBlit<usePixelUnpackBuffer>(device_, *srcGpuBuffer);
1851 }
1852
ImageToBufferCopy(const struct RenderCommandCopyBufferImage & renderCmd)1853 void RenderBackendGLES::ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1854 {
1855 const auto& bc = renderCmd.bufferImageCopy;
1856 const auto* srcGpuImage = static_cast<GpuImageGLES*>(gpuResourceMgr_.GetImage(renderCmd.srcHandle));
1857 const auto* dstGpuBuffer = static_cast<GpuBufferGLES*>(gpuResourceMgr_.GetBuffer(renderCmd.dstHandle));
1858 PLUGIN_ASSERT(srcGpuImage);
1859 PLUGIN_ASSERT(dstGpuBuffer);
1860 if ((srcGpuImage == nullptr) || (dstGpuBuffer == nullptr)) {
1861 return;
1862 }
1863 const auto& iPlat = static_cast<const GpuImagePlatformDataGL&>(srcGpuImage->GetPlatformData());
1864 const auto& bPlat = static_cast<const GpuBufferPlatformDataGL&>(dstGpuBuffer->GetPlatformData());
1865 if ((iPlat.type != GL_TEXTURE_CUBE_MAP) && (iPlat.type != GL_TEXTURE_2D)) {
1866 PLUGIN_LOG_E("Unsupported texture type in ImageToBufferCopy %x", iPlat.type);
1867 return;
1868 }
1869 device_.BindReadFrameBuffer(blitImageSourceFbo_);
1870 PLUGIN_ASSERT(bc.imageSubresource.layerCount == 1);
1871 GLenum type = GL_TEXTURE_2D;
1872 if (iPlat.type == GL_TEXTURE_CUBE_MAP) {
1873 type = GetCubeMapTarget(iPlat.type, bc.imageSubresource.baseArrayLayer);
1874 }
1875 // glFramebufferTextureLayer for array textures....
1876 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, static_cast<GLuint>(iPlat.image),
1877 static_cast<GLint>(bc.imageSubresource.mipLevel));
1878 const Math::UVec2 sPos { bc.imageOffset.width, bc.imageOffset.height };
1879 const Math::UVec2 sExt { bc.imageExtent.width, bc.imageExtent.height };
1880 device_.BindBuffer(GL_PIXEL_PACK_BUFFER, bPlat.buffer);
1881 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(bc.bufferRowLength));
1882 glPixelStorei(GL_PACK_ALIGNMENT, 1);
1883 uintptr_t dstOffset = bc.bufferOffset + bPlat.currentByteOffset;
1884 glReadnPixels(static_cast<GLint>(sPos.x), static_cast<GLint>(sPos.y), static_cast<GLsizei>(sExt.x),
1885 static_cast<GLsizei>(sExt.y), iPlat.format, static_cast<GLenum>(iPlat.dataType),
1886 static_cast<GLsizei>(bPlat.alignedByteSize), reinterpret_cast<void*>(dstOffset));
1887 device_.BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
1888 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, 0, 0);
1889 }
1890
RenderCommandCopyBufferImage(const RenderCommandWithType & ref)1891 void RenderBackendGLES::RenderCommandCopyBufferImage(const RenderCommandWithType& ref)
1892 {
1893 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER_IMAGE);
1894 const auto& renderCmd = *static_cast<const struct RenderCommandCopyBufferImage*>(ref.rc);
1895 PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1896 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1897 BufferToImageCopy(renderCmd);
1898 } else if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1899 ImageToBufferCopy(renderCmd);
1900 }
1901 }
1902
RenderCommandCopyImage(const RenderCommandWithType & ref)1903 void RenderBackendGLES::RenderCommandCopyImage(const RenderCommandWithType& ref)
1904 {
1905 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_IMAGE);
1906 const auto& renderCmd = *static_cast<const struct RenderCommandCopyImage*>(ref.rc);
1907 PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1908 const auto* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1909 const auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1910 if ((srcGpuImage == nullptr) || (dstGpuImage == nullptr)) {
1911 return;
1912 }
1913 const auto& srcDesc = srcGpuImage->GetDesc();
1914 const auto& dstDesc = dstGpuImage->GetDesc();
1915 #if RENDER_VALIDATION_ENABLED
1916 ValidateCopyImage(renderCmd.imageCopy, srcDesc, dstDesc);
1917 #endif
1918 const auto srcMipLevel =
1919 static_cast<GLint>(Math::min(renderCmd.imageCopy.srcSubresource.mipLevel, srcDesc.mipCount - 1));
1920 const auto dstMipLevel =
1921 static_cast<GLint>(Math::min(renderCmd.imageCopy.dstSubresource.mipLevel, dstDesc.mipCount - 1));
1922
1923 auto sOffset = renderCmd.imageCopy.srcOffset;
1924 auto dOffset = renderCmd.imageCopy.dstOffset;
1925 auto size = renderCmd.imageCopy.extent;
1926
1927 // clamp negative offsets to zero and adjust extent and other offset accordingly
1928 ClampOffset(sOffset, dOffset, size);
1929 ClampOffset(dOffset, sOffset, size);
1930
1931 // clamp size to fit src and dst
1932 ClampSize(sOffset, srcDesc, size);
1933 ClampSize(dOffset, dstDesc, size);
1934
1935 const auto& srcPlatData = srcGpuImage->GetPlatformData();
1936 const auto& dstPlatData = dstGpuImage->GetPlatformData();
1937 glCopyImageSubData(srcPlatData.image, srcPlatData.type, srcMipLevel, sOffset.x, sOffset.y, sOffset.z,
1938 dstPlatData.image, dstPlatData.type, dstMipLevel, dOffset.x, dOffset.y, dOffset.z,
1939 static_cast<GLsizei>(size.width), static_cast<GLsizei>(size.height), static_cast<GLsizei>(size.depth));
1940 }
1941
RenderCommandBarrierPoint(const RenderCommandWithType & ref)1942 void RenderBackendGLES::RenderCommandBarrierPoint(const RenderCommandWithType& ref)
1943 {
1944 PLUGIN_ASSERT(ref.type == RenderCommandType::BARRIER_POINT);
1945 const auto& renderCmd = *static_cast<const struct RenderCommandBarrierPoint*>(ref.rc);
1946 const auto& rbList = *managers_.rbList;
1947 // NOTE: proper flagging of barriers.
1948 const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1949 rbList.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1950 if (!barrierPointBarriers) {
1951 return; // early out
1952 }
1953 const uint32_t barrierListCount = barrierPointBarriers->barrierListCount;
1954 const auto* nextBarrierList = barrierPointBarriers->firstBarrierList;
1955 GLbitfield barriers = 0;
1956 GLbitfield barriersByRegion = 0;
1957 for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1958 if (nextBarrierList == nullptr) {
1959 // cannot be null, just a safety
1960 PLUGIN_ASSERT(false);
1961 return;
1962 }
1963 const auto& barrierListRef = *nextBarrierList;
1964 nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1965 const uint32_t barrierCount = barrierListRef.count;
1966
1967 for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1968 const auto& barrier = barrierListRef.commandBarriers[barrierIdx];
1969
1970 // check if written by previous shader as an attachment or storage/ image buffer
1971 if (barrier.src.accessFlags & (CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1972 CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
1973 const auto resourceHandle = barrier.resourceHandle;
1974 const auto handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1975
1976 // barrier by region is between fragment shaders and supports a subset of barriers.
1977 if ((barrier.src.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) &&
1978 (barrier.dst.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
1979 barriersByRegion |= CommonBarrierBits(barrier.dst.accessFlags, handleType);
1980 } else {
1981 // check the barriers shared with ByRegion
1982 barriers |= CommonBarrierBits(barrier.dst.accessFlags, handleType);
1983
1984 // the rest are invalid for ByRegion
1985 if (barrier.dst.accessFlags & CORE_ACCESS_INDIRECT_COMMAND_READ_BIT) {
1986 barriers |= GL_COMMAND_BARRIER_BIT;
1987 }
1988 if (barrier.dst.accessFlags & CORE_ACCESS_INDEX_READ_BIT) {
1989 barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
1990 }
1991 if (barrier.dst.accessFlags & CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
1992 barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
1993 }
1994 // which are the correct accessFlags?
1995 // GL_PIXEL_BUFFER_BARRIER_BIT:
1996 // - buffer objects via the GL_PIXEL_PACK_BUFFER and GL_PIXEL_UNPACK_BUFFER bindings (via
1997 // glReadPixels, glTexSubImage1D, etc.)
1998 // GL_TEXTURE_UPDATE_BARRIER_BIT:
1999 // - texture via glTex(Sub)Image*, glCopyTex(Sub)Image*, glCompressedTex(Sub)Image*, and
2000 // reads via glGetTexImage GL_BUFFER_UPDATE_BARRIER_BIT:
2001 // - glBufferSubData, glCopyBufferSubData, or glGetBufferSubData, or to buffer object memory
2002 // mapped
2003 // by glMapBuffer or glMapBufferRange
2004 // These two are cover all memory access, CORE_ACCESS_MEMORY_READ_BIT,
2005 // CORE_ACCESS_MEMORY_WRITE_BIT?
2006 if (barrier.dst.accessFlags & (CORE_ACCESS_TRANSFER_READ_BIT | CORE_ACCESS_TRANSFER_WRITE_BIT |
2007 CORE_ACCESS_HOST_READ_BIT | CORE_ACCESS_HOST_WRITE_BIT)) {
2008 if (handleType == RenderHandleType::GPU_IMAGE) {
2009 barriers |= GL_TEXTURE_UPDATE_BARRIER_BIT;
2010 } else if (handleType == RenderHandleType::GPU_BUFFER) {
2011 barriers |= GL_BUFFER_UPDATE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT;
2012 }
2013 }
2014 // GL_TRANSFORM_FEEDBACK_BARRIER_BIT is not used at the moment
2015 }
2016 }
2017 }
2018 }
2019 if (barriers) {
2020 glMemoryBarrier(barriers);
2021 }
2022 if (barriersByRegion) {
2023 // only for fragment-fragment
2024 glMemoryBarrierByRegion(barriersByRegion);
2025 }
2026 }
2027
UpdateGlobalDescriptorSets()2028 void RenderBackendGLES::UpdateGlobalDescriptorSets()
2029 {
2030 auto& descriptorSetMgr = static_cast<DescriptorSetManagerGles&>(device_.GetDescriptorSetManager());
2031
2032 // Update global descset if needed
2033 const auto& allDescSets = descriptorSetMgr.GetUpdateDescriptorSetHandles();
2034 if (allDescSets.empty()) {
2035 return;
2036 }
2037 for (const auto& descHandle : allDescSets) {
2038 if (RenderHandleUtil::GetHandleType(descHandle) != RenderHandleType::DESCRIPTOR_SET) {
2039 continue;
2040 }
2041 descriptorSetMgr.UpdateDescriptorSetGpuHandle(descHandle);
2042 }
2043 #if RENDER_HAS_GLES_BACKEND
2044 oesBindingsChanged_ = true;
2045 oesBinds_.clear();
2046 #endif
2047 }
2048
UpdateCommandListDescriptorSets(const RenderCommandList & renderCommandList,NodeContextDescriptorSetManager & ncdsm)2049 void RenderBackendGLES::UpdateCommandListDescriptorSets(
2050 const RenderCommandList& renderCommandList, NodeContextDescriptorSetManager& ncdsm)
2051 {
2052 const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
2053 if (allDescSets.empty()) {
2054 return;
2055 }
2056 for (const auto& descHandle : allDescSets) {
2057 if (RenderHandleUtil::GetHandleType(descHandle) != RenderHandleType::DESCRIPTOR_SET) {
2058 continue;
2059 }
2060 ncdsm.UpdateDescriptorSetGpuHandle(descHandle);
2061 }
2062 #if RENDER_HAS_GLES_BACKEND
2063 oesBindingsChanged_ = true;
2064 oesBinds_.clear();
2065 #endif
2066 }
2067
RenderCommandBindDescriptorSets(const RenderCommandWithType & ref)2068 void RenderBackendGLES::RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)
2069 {
2070 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_DESCRIPTOR_SETS);
2071 if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2072 return;
2073 }
2074 const auto& renderCmd = *static_cast<const struct RenderCommandBindDescriptorSets*>(ref.rc);
2075 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2076
2077 const auto lastSet = renderCmd.firstSet + renderCmd.setCount;
2078 if ((renderCmd.firstSet >= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) ||
2079 (lastSet > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT)) {
2080 return;
2081 }
2082 std::copy(renderCmd.descriptorSetHandles + renderCmd.firstSet, renderCmd.descriptorSetHandles + lastSet,
2083 descriptorSetHandles_ + renderCmd.firstSet);
2084 auto* dst = descriptorSetDynamicOffsets_ + renderCmd.firstSet;
2085 for (const auto &src : array_view(renderCmd.descriptorSetDynamicOffsets + renderCmd.firstSet,
2086 renderCmd.descriptorSetDynamicOffsets + lastSet)) {
2087 dst->dynamicOffsetCount = src.dynamicOffsetCount;
2088 std::copy(src.dynamicOffsets, src.dynamicOffsets + src.dynamicOffsetCount, dst->dynamicOffsets);
2089 ++dst;
2090 }
2091 firstSet_ = static_cast<uint16_t>(renderCmd.firstSet);
2092 setCount_ = static_cast<uint16_t>(renderCmd.setCount);
2093 descriptorUpdate_ = true;
2094
2095 #if RENDER_HAS_GLES_BACKEND
2096 oesBinds_.clear();
2097 oesBindingsChanged_ = true;
2098 const auto& ncdsm = *static_cast<NodeContextDescriptorSetManagerGles*>(managers_.descriptorSetMgr);
2099 for (uint32_t set = firstSet_; set < setCount_; ++set) {
2100 const auto& descHandle = descriptorSetHandles_[set];
2101 if (!ncdsm.HasPlatformConversionBindings(descHandle)) {
2102 continue;
2103 }
2104 const auto& resources = ncdsm.GetResources(descHandle);
2105 for (uint32_t binding = 0U, count = resources.size(); binding < count; ++binding) {
2106 auto& bind = resources[binding];
2107 if ((bind.descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
2108 (bind.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
2109 (bind.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
2110 if (bind.resources[0].image.mode & Gles::EXTERNAL_BIT) {
2111 oesBinds_.push_back(OES_Bind { set, binding });
2112 }
2113 }
2114 }
2115 }
2116 #endif
2117 }
2118
SetPushConstant(uint32_t program,const Gles::PushConstantReflection & pc,const void * data)2119 void RenderBackendGLES::SetPushConstant(uint32_t program, const Gles::PushConstantReflection& pc, const void* data)
2120 {
2121 const auto location = static_cast<GLint>(pc.location);
2122 // the consts list has been filtered and cleared of unused uniforms.
2123 PLUGIN_ASSERT(location != Gles::INVALID_LOCATION);
2124 GLint count = Math::max(static_cast<GLint>(pc.arraySize), 1);
2125 switch (pc.type) {
2126 case GL_UNSIGNED_INT: {
2127 glProgramUniform1uiv(program, location, count, static_cast<const GLuint*>(data));
2128 break;
2129 }
2130 case GL_FLOAT: {
2131 glProgramUniform1fv(program, location, count, static_cast<const GLfloat*>(data));
2132 break;
2133 }
2134 case GL_FLOAT_VEC2: {
2135 glProgramUniform2fv(program, location, count, static_cast<const GLfloat*>(data));
2136 break;
2137 }
2138 case GL_FLOAT_VEC4: {
2139 glProgramUniform4fv(program, location, count, static_cast<const GLfloat*>(data));
2140 break;
2141 }
2142 case GL_FLOAT_MAT4: {
2143 glProgramUniformMatrix4fv(program, location, count, false, static_cast<const GLfloat*>(data));
2144 break;
2145 }
2146 case GL_UNSIGNED_INT_VEC4: {
2147 glProgramUniform4uiv(program, location, count, static_cast<const GLuint*>(data));
2148 break;
2149 }
2150 default:
2151 PLUGIN_ASSERT_MSG(false, "Unhandled pushconstant variable type");
2152 }
2153 }
2154
SetPushConstants(uint32_t program,const array_view<Gles::PushConstantReflection> & consts)2155 void RenderBackendGLES::SetPushConstants(uint32_t program, const array_view<Gles::PushConstantReflection>& consts)
2156 {
2157 if (boundProgram_.setPushConstants) {
2158 boundProgram_.setPushConstants = false;
2159 const auto& renderCmd = boundProgram_.pushConstants;
2160 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2161 PLUGIN_ASSERT_MSG(renderCmd.pushConstant.byteSize > 0, "PushConstant byteSize is zero!");
2162 PLUGIN_ASSERT_MSG(renderCmd.data, "PushConstant data is nullptr!");
2163 if ((renderCmd.data == nullptr) || (renderCmd.pushConstant.byteSize == 0))
2164 return;
2165 // ASSERT: expecting data is valid
2166 // NOTE: handle rest of the types
2167 for (const auto& pc : consts) {
2168 const size_t offs = pc.offset;
2169 if ((offs + pc.size) > renderCmd.pushConstant.byteSize) {
2170 PLUGIN_LOG_E(
2171 "pushConstant data invalid (data for %s is missing [offset:%zu size:%zu] byteSize of data:%u)",
2172 pc.name.c_str(), pc.offset, pc.size, renderCmd.pushConstant.byteSize);
2173 continue;
2174 }
2175 /*
2176 NOTE: handle the strides....
2177 consts[i].array_stride;
2178 consts[i].matrix_stride; */
2179 SetPushConstant(program, pc, &renderCmd.data[offs]);
2180 }
2181 }
2182 }
2183
RenderCommandPushConstant(const RenderCommandWithType & ref)2184 void RenderBackendGLES::RenderCommandPushConstant(const RenderCommandWithType& ref)
2185 {
2186 PLUGIN_ASSERT(ref.type == RenderCommandType::PUSH_CONSTANT);
2187 if (!boundComputeProgram_ && !boundShaderProgram_) {
2188 return;
2189 }
2190 const auto& renderCmd = *static_cast<const struct RenderCommandPushConstant*>(ref.rc);
2191 if (renderCmd.pushConstant.byteSize > 0) {
2192 PLUGIN_ASSERT(renderCmd.data);
2193 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2194 boundProgram_.setPushConstants = true;
2195 boundProgram_.pushConstants = renderCmd;
2196 if (boundComputeProgram_) {
2197 const auto& sd =
2198 static_cast<const GpuComputeProgramPlatformDataGL&>(boundComputeProgram_->GetPlatformData());
2199 SetPushConstants(sd.program, sd.pushConstants);
2200 } else {
2201 const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(boundShaderProgram_->GetPlatformData());
2202 SetPushConstants(sd.program, sd.pushConstants);
2203 }
2204 }
2205 }
2206
RenderCommandClearColorImage(const RenderCommandWithType & ref)2207 void RenderBackendGLES::RenderCommandClearColorImage(const RenderCommandWithType& ref)
2208 {
2209 PLUGIN_ASSERT(ref.type == RenderCommandType::CLEAR_COLOR_IMAGE);
2210 #if RENDER_HAS_GLES_BACKEND
2211 #if (RENDER_VALIDATION_ENABLED == 1)
2212 PLUGIN_LOG_ONCE_E("RenderBackendGLES::RenderCommandClearColorImage",
2213 "Render command clear color image not support with GLES. One should implement higher level path for "
2214 "clearing.");
2215 #endif
2216 #else
2217 const auto& renderCmd = *static_cast<const struct RenderCommandClearColorImage*>(ref.rc);
2218
2219 const GpuImageGLES* imagePtr = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.handle);
2220 if (imagePtr) {
2221 const GpuImagePlatformDataGL& platImage = imagePtr->GetPlatformData();
2222 // NOTE: mip levels and array layers should be handled separately
2223 for (const auto& subresRef : renderCmd.ranges) {
2224 glClearTexImage(platImage.image, // texture
2225 (int32_t)subresRef.baseMipLevel, // level
2226 platImage.format, // format
2227 platImage.dataType, // type
2228 &renderCmd.color); // data
2229 }
2230 }
2231 #endif
2232 }
2233
2234 // dynamic states
RenderCommandDynamicStateViewport(const RenderCommandWithType & ref)2235 void RenderBackendGLES::RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)
2236 {
2237 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_VIEWPORT);
2238 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateViewport*>(ref.rc);
2239 const ViewportDesc& vd = renderCmd.viewportDesc;
2240 SetViewport(vd);
2241 }
2242
RenderCommandDynamicStateScissor(const RenderCommandWithType & ref)2243 void RenderBackendGLES::RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)
2244 {
2245 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_SCISSOR);
2246 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateScissor*>(ref.rc);
2247 const ScissorDesc& sd = renderCmd.scissorDesc;
2248 SetScissor(sd);
2249 }
2250
RenderCommandDynamicStateLineWidth(const RenderCommandWithType & ref)2251 void RenderBackendGLES::RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)
2252 {
2253 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_LINE_WIDTH);
2254 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateLineWidth*>(ref.rc);
2255 if (renderCmd.lineWidth != cacheState_.rasterizationState.lineWidth) {
2256 cacheState_.rasterizationState.lineWidth = renderCmd.lineWidth;
2257 glLineWidth(renderCmd.lineWidth);
2258 }
2259 }
2260
RenderCommandDynamicStateDepthBias(const RenderCommandWithType & ref)2261 void RenderBackendGLES::RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)
2262 {
2263 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS);
2264 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBias not implemented");
2265 }
2266
RenderCommandDynamicStateBlendConstants(const RenderCommandWithType & ref)2267 void RenderBackendGLES::RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)
2268 {
2269 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS);
2270 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateBlendConstants not implemented");
2271 }
2272
RenderCommandDynamicStateDepthBounds(const RenderCommandWithType & ref)2273 void RenderBackendGLES::RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)
2274 {
2275 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS);
2276 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBounds not implemented");
2277 }
2278
SetStencilState(const uint32_t frontFlags,const GraphicsState::StencilOpState & front,const uint32_t backFlags,const GraphicsState::StencilOpState & back)2279 void RenderBackendGLES::SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front,
2280 const uint32_t backFlags, const GraphicsState::StencilOpState& back)
2281 {
2282 auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2283 auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2284 const uint32_t FUNCMASK =
2285 (StencilSetFlags::SETCOMPAREOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETREFERENCE);
2286 if (frontFlags & StencilSetFlags::SETWRITEMASK) {
2287 cFront.writeMask = front.writeMask;
2288 glStencilMaskSeparate(GL_FRONT, cFront.writeMask);
2289 }
2290 if (frontFlags & FUNCMASK) {
2291 SetStencilCompareOp(cFront, front);
2292 glStencilFuncSeparate(
2293 GL_FRONT, GetCompareOp(cFront.compareOp), static_cast<GLint>(cFront.reference), cFront.compareMask);
2294 }
2295 if (frontFlags & StencilSetFlags::SETOP) {
2296 SetStencilOp(cFront, front);
2297 glStencilOpSeparate(
2298 GL_FRONT, GetStencilOp(cFront.failOp), GetStencilOp(cFront.depthFailOp), GetStencilOp(cFront.passOp));
2299 }
2300 if (backFlags & StencilSetFlags::SETWRITEMASK) {
2301 cBack.writeMask = back.writeMask;
2302 glStencilMaskSeparate(GL_BACK, cBack.writeMask);
2303 }
2304 if (backFlags & FUNCMASK) {
2305 SetStencilCompareOp(cBack, back);
2306 glStencilFuncSeparate(
2307 GL_BACK, GetCompareOp(cBack.compareOp), static_cast<GLint>(cBack.reference), cBack.compareMask);
2308 }
2309 if (backFlags & StencilSetFlags::SETOP) {
2310 SetStencilOp(cBack, back);
2311 glStencilOpSeparate(
2312 GL_FRONT, GetStencilOp(cBack.failOp), GetStencilOp(cBack.depthFailOp), GetStencilOp(cBack.passOp));
2313 }
2314 }
2315
RenderCommandDynamicStateStencil(const RenderCommandWithType & ref)2316 void RenderBackendGLES::RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)
2317 {
2318 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_STENCIL);
2319 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateStencil*>(ref.rc);
2320 auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2321 auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2322 uint32_t setFront = 0;
2323 uint32_t setBack = 0;
2324 if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_FRONT_BIT) {
2325 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2326 if (renderCmd.mask != cFront.compareMask) {
2327 cFront.compareMask = renderCmd.mask;
2328 setFront |= StencilSetFlags::SETCOMPAREMASK;
2329 }
2330 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2331 if (renderCmd.mask != cFront.writeMask) {
2332 cFront.writeMask = renderCmd.mask;
2333 setFront |= StencilSetFlags::SETWRITEMASK;
2334 }
2335 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2336 if (renderCmd.mask != cFront.reference) {
2337 cFront.reference = renderCmd.mask;
2338 setFront |= StencilSetFlags::SETREFERENCE;
2339 }
2340 }
2341 }
2342 if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_BACK_BIT) {
2343 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2344 if (renderCmd.mask != cBack.compareMask) {
2345 cBack.compareMask = renderCmd.mask;
2346 setBack |= StencilSetFlags::SETCOMPAREMASK;
2347 }
2348 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2349 if (renderCmd.mask != cBack.writeMask) {
2350 cBack.writeMask = renderCmd.mask;
2351 setBack |= StencilSetFlags::SETWRITEMASK;
2352 }
2353 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2354 if (renderCmd.mask != cBack.reference) {
2355 cBack.reference = renderCmd.mask;
2356 setBack |= StencilSetFlags::SETREFERENCE;
2357 }
2358 }
2359 }
2360 SetStencilState(setFront, cFront, setBack, cBack);
2361 }
2362
RenderCommandFragmentShadingRate(const RenderCommandWithType & renderCmd)2363 void RenderBackendGLES::RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)
2364 {
2365 #if (RENDER_VALIDATION_ENABLED == 1)
2366 PLUGIN_LOG_ONCE_I("gles_RenderCommandFragmentShadingRate",
2367 "RENDER_VALIDATION: Fragment shading rate not available with GL(ES) backend.");
2368 #endif
2369 }
2370
RenderCommandExecuteBackendFramePosition(const RenderCommandWithType & renderCmd)2371 void RenderBackendGLES::RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)
2372 {
2373 PLUGIN_ASSERT_MSG(false, "RenderCommandExecuteBackendFramePosition not implemented");
2374 }
2375
RenderCommandWriteTimestamp(const RenderCommandWithType & renderCmd)2376 void RenderBackendGLES::RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)
2377 {
2378 PLUGIN_ASSERT_MSG(false, "RenderCommandWriteTimestamp not implemented");
2379 }
2380
BindVertexInputs(const VertexInputDeclarationData & decldata,const array_view<const int32_t> & vertexInputs)2381 void RenderBackendGLES::BindVertexInputs(
2382 const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)
2383 {
2384 // update bindings for the VAO.
2385 // process with attribute descriptions to only bind the needed vertex buffers
2386 // NOTE: that there are or might be extra bindings in the decldata.bindingDescriptions,
2387 // but we only bind the ones needed for the shader
2388 const uint32_t minBinding = Math::min(vertexAttribBinds_, decldata.attributeDescriptionCount);
2389 for (uint32_t i = 0; i < minBinding; ++i) {
2390 const auto& attributeRef = decldata.attributeDescriptions[i];
2391 const uint32_t location = attributeRef.location;
2392 const uint32_t binding = attributeRef.binding;
2393 // NOTE: we need to bind all the buffers to the correct bindings.
2394 // shader optimized check (vertexInputs, some locations are not in use)
2395 if ((location != ~0u) && (binding != ~0u) && (vertexInputs[location] != Gles::INVALID_LOCATION)) {
2396 const auto& slot = vertexAttribBindSlots_[binding];
2397 const auto& bindingRef = decldata.bindingDescriptions[binding];
2398 PLUGIN_ASSERT(bindingRef.binding == binding);
2399 // buffer bound to slot, and it's used by the shader.
2400 device_.BindVertexBuffer(binding, slot.id, slot.offset, static_cast<intptr_t>(bindingRef.stride));
2401 /*
2402 core/vulkan
2403 bindingRef.vertexInputRate = CORE_VERTEX_INPUT_RATE_VERTEX (0) attribute index advances per vertex
2404 bindingRef.vertexInputRate = CORE_VERTEX_INPUT_RATE_INSTANCE (1) attribute index advances per instance
2405
2406 gl/gles
2407 If divisor is 0, the attributes using the buffer bound to bindingindex advance once per vertex.
2408 If divisor is >0, the attributes advance once per divisor instances of the set(s) of vertices being
2409 rendered.
2410
2411 so we can directly pass the inputRate as VertexBindingDivisor. (ie. advance once per instance)
2412 ie. enum happens to match and can simply cast.
2413 */
2414 static_assert(CORE_VERTEX_INPUT_RATE_VERTEX == 0 && CORE_VERTEX_INPUT_RATE_INSTANCE == 1);
2415 device_.VertexBindingDivisor(binding, static_cast<uint32_t>(bindingRef.vertexInputRate));
2416 }
2417 }
2418 }
2419
BindSampler(const array_view<const Gles::Bind::Resource> resources,const Binder & binder,const array_view<const Slice> descriptorIndex,const array_view<const uint8_t> ids)2420 void RenderBackendGLES::BindSampler(const array_view<const Gles::Bind::Resource> resources, const Binder& binder,
2421 const array_view<const Slice> descriptorIndex, const array_view<const uint8_t> ids)
2422 {
2423 const auto end = Math::min(resources.size(), descriptorIndex.size());
2424 for (uint32_t index = 0; index < end; ++index) {
2425 const auto& idRange = descriptorIndex[index];
2426 if ((size_t(idRange.index) + idRange.count) > ids.size()) {
2427 continue;
2428 }
2429 const auto samplerId = resources[index].sampler.samplerId;
2430 for (const auto& id : array_view(ids.data() + idRange.index, idRange.count)) {
2431 const auto textureUnit = index + id;
2432 #if (RENDER_PERF_ENABLED == 1)
2433 if (device_.BoundSampler(textureUnit) != samplerId) {
2434 ++perfCounters_.bindSampler;
2435 }
2436 #endif
2437 device_.BindSampler(textureUnit, samplerId);
2438 }
2439 }
2440 }
2441
BindTexture(array_view<const Gles::Bind::Resource> resources,const Binder & binder,BASE_NS::array_view<const Slice> descriptorIndex,BASE_NS::array_view<const uint8_t> ids,DescriptorType descriptorType)2442 void RenderBackendGLES::BindTexture(array_view<const Gles::Bind::Resource> resources, const Binder& binder,
2443 BASE_NS::array_view<const Slice> descriptorIndex, BASE_NS::array_view<const uint8_t> ids,
2444 DescriptorType descriptorType)
2445 {
2446 const auto end = Math::min(resources.size(), descriptorIndex.size());
2447 for (uint32_t index = 0; index < end; ++index) {
2448 const auto& idRange = descriptorIndex[index];
2449 if ((size_t(idRange.index) + idRange.count) > ids.size()) {
2450 continue;
2451 }
2452 const auto& imgType = resources[index].image;
2453 if (!imgType.image) {
2454 continue;
2455 }
2456 auto& plat = imgType.image->GetPlatformData();
2457 for (const auto& id : array_view(ids.data() + idRange.index, idRange.count)) {
2458 const auto textureUnit = index + id;
2459 uint32_t samplerId = UINT32_MAX;
2460 if (descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2461 samplerId = resources[index].sampler.samplerId;
2462 } else if (descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
2463 samplerId = 0U;
2464 }
2465 if (samplerId != UINT32_MAX) {
2466 #if (RENDER_PERF_ENABLED == 1)
2467 if (device_.BoundSampler(textureUnit) != samplerId) {
2468 ++perfCounters_.bindSampler;
2469 }
2470 #endif
2471 device_.BindSampler(textureUnit, samplerId);
2472 }
2473 const auto baseLevel =
2474 (imgType.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? imgType.mipLevel : 0U;
2475 if (descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
2476 device_.BindImageTexture(
2477 textureUnit, plat.image, baseLevel, false, 0, imgType.mode & 0xFFFF, plat.internalFormat);
2478 } else {
2479 #if (RENDER_PERF_ENABLED == 1)
2480 if (device_.BoundTexture(textureUnit, plat.type) != plat.image) {
2481 ++perfCounters_.bindTexture;
2482 }
2483 #endif
2484 device_.BindTexture(textureUnit, plat.type, plat.image);
2485 // NOTE: the last setting is active, can not have different miplevels bound from single
2486 // resource.
2487 // Check and update (if needed) the forced miplevel.
2488 if (plat.mipLevel != imgType.mipLevel) {
2489 // NOTE: we are actually modifying the texture object bound above
2490 const_cast<GpuImagePlatformDataGL&>(plat).mipLevel = imgType.mipLevel;
2491 // either force the defined mip level or use defaults.
2492 const auto maxLevel = static_cast<GLint>(
2493 (plat.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? plat.mipLevel : 1000U);
2494 glTexParameteri(plat.type, GL_TEXTURE_BASE_LEVEL, static_cast<GLint>(baseLevel));
2495 glTexParameteri(plat.type, GL_TEXTURE_MAX_LEVEL, maxLevel);
2496 }
2497 }
2498 }
2499 }
2500 }
2501
BindBuffer(array_view<const Gles::Bind::Resource> resources,const Binder & binder,BASE_NS::array_view<const Slice> descriptorIndex,BASE_NS::array_view<const uint8_t> ids,DescriptorType descriptorType)2502 void RenderBackendGLES::BindBuffer(array_view<const Gles::Bind::Resource> resources, const Binder& binder,
2503 BASE_NS::array_view<const Slice> descriptorIndex, BASE_NS::array_view<const uint8_t> ids,
2504 DescriptorType descriptorType)
2505 {
2506 uint32_t target = 0U;
2507 if ((descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) ||
2508 (descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)) {
2509 target = GL_UNIFORM_BUFFER;
2510 } else if ((descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) ||
2511 (descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
2512 target = GL_SHADER_STORAGE_BUFFER;
2513 }
2514 const auto end = Math::min(resources.size(), descriptorIndex.size());
2515 for (uint32_t index = 0; index < end; index++) {
2516 const auto& idRange = descriptorIndex[index];
2517 if ((size_t(idRange.index) + idRange.count) > ids.size()) {
2518 continue;
2519 }
2520 const auto& obj = resources[index];
2521 uint32_t dynOffset = 0U;
2522 if (auto& currentOffsets = descriptorSetDynamicOffsets_[binder.set]; currentOffsets.dynamicOffsetCount) {
2523 auto& currentIndex = dynamicOffsetIndices_[binder.bind];
2524 if (currentIndex < currentOffsets.dynamicOffsetCount) {
2525 dynOffset = currentOffsets.dynamicOffsets[currentIndex];
2526 } else {
2527 PLUGIN_LOG_E("outofoffsets");
2528 }
2529 }
2530
2531 for (const auto& id : array_view(ids.data() + idRange.index, idRange.count)) {
2532 const auto binding = index + id;
2533 #if (RENDER_PERF_ENABLED == 1)
2534 if (device_.BoundBuffer(target) != obj.buffer.bufferId) {
2535 ++perfCounters_.bindBuffer;
2536 }
2537 #endif
2538 device_.BindBufferRange(
2539 target, binding, obj.buffer.bufferId, obj.buffer.offset + dynOffset, obj.buffer.size);
2540 }
2541 }
2542 }
2543
BindResources()2544 void RenderBackendGLES::BindResources()
2545 {
2546 if (!descriptorUpdate_) {
2547 return;
2548 }
2549 descriptorUpdate_ = false;
2550 const ResourcesView* shaderBindings = nullptr;
2551 if (boundComputeProgram_) {
2552 shaderBindings =
2553 &static_cast<const GpuComputeProgramPlatformDataGL&>(boundComputeProgram_->GetPlatformData()).resourcesView;
2554 } else if (boundShaderProgram_) {
2555 #if RENDER_HAS_GLES_BACKEND
2556 if (oesBindingsChanged_) {
2557 oesBindingsChanged_ = false;
2558
2559 // ask for a compatible program from the boundGraphicsPipeline_
2560 auto shader = boundGraphicsPipeline_->GetOESProgram(oesBinds_);
2561 if (!shader) {
2562 return;
2563 }
2564 if (boundShaderProgram_ != shader) {
2565 boundShaderProgram_ = shader;
2566 const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
2567 // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
2568 const uint32_t program = sd.program;
2569 #if (RENDER_PERF_ENABLED == 1)
2570 if (device_.BoundProgram() != program) {
2571 ++perfCounters_.bindProgram;
2572 }
2573 #endif
2574 device_.UseProgram(program);
2575 if (sd.flipLocation != Gles::INVALID_LOCATION) {
2576 const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
2577 glProgramUniform1fv(program, sd.flipLocation, 1, &flip);
2578 }
2579 }
2580 }
2581 #endif
2582 shaderBindings =
2583 &static_cast<const GpuShaderProgramPlatformDataGL&>(boundShaderProgram_->GetPlatformData()).resourcesView;
2584 }
2585 if (!shaderBindings) {
2586 return;
2587 }
2588
2589 const auto& ncdsm = *static_cast<NodeContextDescriptorSetManagerGles*>(managers_.descriptorSetMgr);
2590 uint32_t currentSet = UINT32_MAX;
2591 array_view<const Gles::Bind> descriptorSetResources;
2592 // with some bookkeeping might be possible to go through only descriptor sets based on previous
2593 // RenderCommandBindDescriptorSets instead of all sets.
2594 for (auto& binder : (shaderBindings->resourceList)) {
2595 // binders are in set - binding order. when set changes get the resources for the current set and gather dynamic
2596 // offsets
2597 if (binder.set != currentSet) {
2598 currentSet = binder.set;
2599 descriptorSetResources = ncdsm.GetResources(descriptorSetHandles_[binder.set]);
2600
2601 // descriptorSetDynamicOffsets_ are only for dynamic buffers. figure out which index should be used for
2602 // which binding.
2603 dynamicOffsetIndices_.resize(descriptorSetResources.size());
2604 uint32_t index = 0U;
2605 std::transform(descriptorSetResources.cbegin(), descriptorSetResources.cend(),
2606 dynamicOffsetIndices_.begin(), [&index](const Gles::Bind& bind) {
2607 if ((bind.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) ||
2608 (bind.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
2609 return index++;
2610 }
2611 return 0U;
2612 });
2613 }
2614 if (binder.bind >= descriptorSetResources.size()) {
2615 PLUGIN_LOG_W(
2616 "Desctiptor count mismatch pipeline %u, binding %zu", binder.bind, descriptorSetResources.size());
2617 continue;
2618 }
2619 auto& curRes = descriptorSetResources[binder.bind];
2620 #if RENDER_VALIDATION_ENABLED
2621 if (binder.descriptors.count != curRes.resources.size()) {
2622 PLUGIN_LOG_W(
2623 "Desctiptor size mismatch pipeline %u, binding %zu", binder.descriptors.count, curRes.resources.size());
2624 }
2625
2626 auto descriptorType = curRes.descriptorType;
2627 if (descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
2628 descriptorType = CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
2629 } else if (descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2630 descriptorType = CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2631 }
2632 if (binder.type != descriptorType) {
2633 PLUGIN_LOG_W("Desctiptor TYPE mismatch pipeline %x, binding %x", binder.type, descriptorType);
2634 }
2635 #endif
2636 auto descriptorIndex =
2637 array_view(shaderBindings->descriptorIndexIds.data() + binder.descriptors.index, binder.descriptors.count);
2638 switch (curRes.descriptorType) {
2639 case CORE_DESCRIPTOR_TYPE_SAMPLER: {
2640 BindSampler(curRes.resources, binder, descriptorIndex, shaderBindings->ids);
2641 break;
2642 }
2643
2644 case CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2645 case CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2646 case CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2647 case CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
2648 BindTexture(curRes.resources, binder, descriptorIndex, shaderBindings->ids, curRes.descriptorType);
2649 break;
2650 }
2651 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2652 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2653 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2654 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
2655 BindBuffer(curRes.resources, binder, descriptorIndex, shaderBindings->ids, curRes.descriptorType);
2656 break;
2657 }
2658 case CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2659 case CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2660 case CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE:
2661 case CORE_DESCRIPTOR_TYPE_MAX_ENUM:
2662 break;
2663 }
2664 }
2665 }
2666
RenderCommandBeginDebugMarker(const RenderCommandWithType & ref)2667 void RenderBackendGLES::RenderCommandBeginDebugMarker(const RenderCommandWithType& ref)
2668 {
2669 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2670 const auto& renderCmd = *static_cast<const struct RenderCommandBeginDebugMarker*>(ref.rc);
2671 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)renderCmd.name.data());
2672 #endif
2673 }
2674
RenderCommandEndDebugMarker(const RenderCommandWithType &)2675 void RenderBackendGLES::RenderCommandEndDebugMarker(const RenderCommandWithType&)
2676 {
2677 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2678 glPopDebugGroup();
2679 #endif
2680 }
2681
2682 #if (RENDER_PERF_ENABLED == 1)
StartFrameTimers(const RenderCommandFrameData & renderCommandFrameData)2683 void RenderBackendGLES::StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)
2684 {
2685 framePerfCounters_ = {};
2686 for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2687 const string_view& debugName = renderCommandContext.debugName;
2688 if (timers_.count(debugName) == 0) { // new timers
2689 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2690 PerfDataSet& perfDataSet = timers_[debugName];
2691 constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2692 perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryGLES(device_, desc));
2693 perfDataSet.counter = 0u;
2694 #else
2695 timers_.insert({ debugName, {} });
2696 #endif
2697 }
2698 }
2699 }
2700
EndFrameTimers()2701 void RenderBackendGLES::EndFrameTimers()
2702 {
2703 int64_t fullGpuTime = 0;
2704 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2705 // already in micros
2706 fullGpuTime = fullGpuCounter_;
2707 fullGpuCounter_ = 0;
2708 #endif
2709 if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2710 CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2711 globalPerfData) {
2712 CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RENDER");
2713 perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2714 perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2715 perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2716 perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2717 perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2718 perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2719
2720 CORE_PROFILER_PLOT("Full_Cpu", static_cast<int64_t>(commonCpuTimers_.full.GetMicroseconds()));
2721 CORE_PROFILER_PLOT("Acquire_Cpu", static_cast<int64_t>(commonCpuTimers_.acquire.GetMicroseconds()));
2722 CORE_PROFILER_PLOT("Execute_Cpu", static_cast<int64_t>(commonCpuTimers_.execute.GetMicroseconds()));
2723 CORE_PROFILER_PLOT("Submit_Cpu", static_cast<int64_t>(commonCpuTimers_.submit.GetMicroseconds()));
2724 CORE_PROFILER_PLOT("Present_Cpu", static_cast<int64_t>(commonCpuTimers_.present.GetMicroseconds()));
2725 CORE_PROFILER_PLOT("Full_Gpu", static_cast<int64_t>(fullGpuTime));
2726 }
2727
2728 CORE_PROFILER_PLOT("Instance count", static_cast<int64_t>(framePerfCounters_.instanceCount));
2729 CORE_PROFILER_PLOT("Triangle count", static_cast<int64_t>(framePerfCounters_.triangleCount));
2730 CORE_PROFILER_PLOT("Draw count", static_cast<int64_t>(framePerfCounters_.drawCount));
2731 CORE_PROFILER_PLOT("Draw Indirect count", static_cast<int64_t>(framePerfCounters_.drawIndirectCount));
2732 CORE_PROFILER_PLOT("Dispatch count", static_cast<int64_t>(framePerfCounters_.dispatchCount));
2733 CORE_PROFILER_PLOT("Dispatch Indirect count", static_cast<int64_t>(framePerfCounters_.dispatchIndirectCount));
2734 CORE_PROFILER_PLOT("RenderPass count", static_cast<int64_t>(framePerfCounters_.renderPassCount));
2735 CORE_PROFILER_PLOT("Bind program count", static_cast<int64_t>(framePerfCounters_.bindProgram));
2736 CORE_PROFILER_PLOT("Bind sampler count", static_cast<int64_t>(framePerfCounters_.bindSampler));
2737 CORE_PROFILER_PLOT("Bind texture count", static_cast<int64_t>(framePerfCounters_.bindTexture));
2738 CORE_PROFILER_PLOT("Bind buffer count", static_cast<int64_t>(framePerfCounters_.bindBuffer));
2739 }
2740
CopyPerfTimeStamp(const string_view name,PerfDataSet & perfDataSet)2741 void RenderBackendGLES::CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)
2742 {
2743 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2744 int64_t gpuMicroSeconds = 0;
2745 if (validGpuQueries_) {
2746 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
2747 PLUGIN_ASSERT(gpuQuery);
2748
2749 gpuQuery->NextQueryIndex();
2750
2751 const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
2752 PLUGIN_ASSERT(platData.queryObject);
2753
2754 GLint disjointOccurred = 0;
2755 #ifdef GL_GPU_DISJOINT_EXT
2756 // Clear disjoint error
2757 glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
2758 #endif
2759 if (!disjointOccurred && (++perfDataSet.counter) > device_.GetCommandBufferingCount()) {
2760 GLuint64 gpuNanoSeconds = 0U;
2761 #ifdef GL_GPU_DISJOINT_EXT
2762 glGetQueryObjectui64vEXT(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2763 #else
2764 glGetQueryObjectui64v(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2765 #endif
2766 static uint64_t NANOSECONDS_TO_MICROSECONDS = 1000;
2767 gpuMicroSeconds = static_cast<int64_t>(gpuNanoSeconds / NANOSECONDS_TO_MICROSECONDS);
2768 if (gpuMicroSeconds > UINT32_MAX) {
2769 gpuMicroSeconds = 0;
2770 }
2771 fullGpuCounter_ += gpuMicroSeconds;
2772 } else if (disjointOccurred) {
2773 PLUGIN_LOG_V("GL_GPU_DISJOINT_EXT disjoint occurred.");
2774 }
2775 }
2776 #endif
2777 const int64_t cpuMicroSeconds = perfDataSet.cpuTimer.GetMicroseconds();
2778
2779 if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2780 CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2781 globalPerfData) {
2782 CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2783
2784 perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2785 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2786 perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2787 #endif
2788 perfData->UpdateData(name, "Backend_Count_Triangle", perfCounters_.triangleCount,
2789 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2790 perfData->UpdateData(name, "Backend_Count_InstanceCount", perfCounters_.instanceCount,
2791 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2792 perfData->UpdateData(name, "Backend_Count_Draw", perfCounters_.drawCount,
2793 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2794 perfData->UpdateData(name, "Backend_Count_DrawIndirect", perfCounters_.drawIndirectCount,
2795 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2796 perfData->UpdateData(name, "Backend_Count_Dispatch", perfCounters_.dispatchCount,
2797 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2798 perfData->UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters_.dispatchIndirectCount,
2799 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2800 perfData->UpdateData(name, "Backend_Count_RenderPass", perfCounters_.renderPassCount,
2801 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2802 perfData->UpdateData(name, "Backend_Count_BindProgram", perfCounters_.bindProgram,
2803 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2804 perfData->UpdateData(name, "Backend_Count_BindSample", perfCounters_.bindSampler,
2805 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2806 perfData->UpdateData(name, "Backend_Count_BindTexture", perfCounters_.bindTexture,
2807 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2808 perfData->UpdateData(name, "Backend_Count_BindBuffer", perfCounters_.bindBuffer,
2809 CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2810 framePerfCounters_.drawCount += perfCounters_.drawCount;
2811 framePerfCounters_.drawIndirectCount += perfCounters_.drawIndirectCount;
2812 framePerfCounters_.dispatchCount += perfCounters_.dispatchCount;
2813 framePerfCounters_.dispatchIndirectCount += perfCounters_.dispatchIndirectCount;
2814 framePerfCounters_.renderPassCount += perfCounters_.renderPassCount;
2815 framePerfCounters_.bindProgram += perfCounters_.bindProgram;
2816 framePerfCounters_.bindSampler += perfCounters_.bindSampler;
2817 framePerfCounters_.bindTexture += perfCounters_.bindTexture;
2818 framePerfCounters_.bindBuffer += perfCounters_.bindBuffer;
2819 framePerfCounters_.triangleCount += perfCounters_.triangleCount;
2820 framePerfCounters_.instanceCount += perfCounters_.instanceCount;
2821 }
2822 }
2823 #endif
2824
PrimeDepthStencilState(const GraphicsState & graphicsState)2825 void RenderBackendGLES::PrimeDepthStencilState(const GraphicsState& graphicsState)
2826 {
2827 auto& cDepth = cacheState_.depthStencilState;
2828 cDepth = graphicsState.depthStencilState;
2829 // CORE_DYNAMIC_STATE_DEPTH_BOUNDS NOT SUPPORTED ON GLES. (and not implemented on GL either)
2830 SetState(GL_DEPTH_TEST, cDepth.enableDepthTest);
2831 SetState(GL_STENCIL_TEST, cDepth.enableStencilTest);
2832 glDepthFunc(GetCompareOp(cDepth.depthCompareOp));
2833 glDepthMask((cDepth.enableDepthWrite ? static_cast<GLboolean>(GL_TRUE) : static_cast<GLboolean>(GL_FALSE)));
2834 const uint32_t updateAllFlags =
2835 (StencilSetFlags::SETOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETCOMPAREOP |
2836 StencilSetFlags::SETREFERENCE | StencilSetFlags::SETWRITEMASK);
2837 SetStencilState(updateAllFlags, cDepth.frontStencilOpState, updateAllFlags, cDepth.backStencilOpState);
2838 }
2839
PrimeBlendState(const GraphicsState & graphicsState)2840 void RenderBackendGLES::PrimeBlendState(const GraphicsState& graphicsState)
2841 {
2842 auto& cBlend = cacheState_.colorBlendState;
2843 cBlend = graphicsState.colorBlendState;
2844 glBlendColor(cBlend.colorBlendConstants[Gles::RED_INDEX], cBlend.colorBlendConstants[Gles::GREEN_INDEX],
2845 cBlend.colorBlendConstants[Gles::BLUE_INDEX], cBlend.colorBlendConstants[Gles::ALPHA_INDEX]);
2846 GLuint maxColorAttachments;
2847 glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS, (GLint*)&maxColorAttachments);
2848 maxColorAttachments = BASE_NS::Math::min(PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT, maxColorAttachments);
2849 for (GLuint i = 0; i < maxColorAttachments; i++) {
2850 const auto& cBlendState = cBlend.colorAttachments[i];
2851 glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2852 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2853 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2854 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2855 if (cBlendState.enableBlend) {
2856 glEnablei(GL_BLEND, i);
2857 } else {
2858 glDisablei(GL_BLEND, i);
2859 }
2860 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2861 GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2862 GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2863 glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2864 }
2865 // logicops are unsupported on GLES
2866 }
2867
PrimeCache(const GraphicsState & graphicsState)2868 void RenderBackendGLES::PrimeCache(const GraphicsState& graphicsState) // Forces the graphics state..
2869 {
2870 if (cachePrimed_) {
2871 return;
2872 }
2873 cachePrimed_ = true;
2874 /// GRAPHICSSTATE inputAssembly
2875 const auto& ia = graphicsState.inputAssembly;
2876 auto& cia = cacheState_.inputAssembly;
2877 cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
2878 SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
2879 topology_ = ia.primitiveTopology;
2880 /// GRAPHICSSTATE rasterizationState
2881 const auto& rs = graphicsState.rasterizationState;
2882 auto& crs = cacheState_.rasterizationState;
2883 // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2884 polygonMode_ = rs.polygonMode;
2885 // GL_DEPTH_CLAMP,rs.enableDepthClamp NOT SUPPORTED CHECK GLES 3.2
2886 crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2887 SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2888 crs.enableDepthBias = rs.enableDepthBias;
2889 SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2890 crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2891 crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2892 glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2893 // depthBiasClamp NOT SUPPORTED! CHECK GLES 3.2
2894 // If cull mode Flags change...
2895 crs.cullModeFlags = rs.cullModeFlags;
2896 SetCullMode(crs);
2897 crs.frontFace = rs.frontFace;
2898 SetFrontFace(crs);
2899 crs.lineWidth = rs.lineWidth;
2900 glLineWidth(rs.lineWidth);
2901 PrimeDepthStencilState(graphicsState);
2902 PrimeBlendState(graphicsState);
2903 }
2904
UpdateDepthState(const GraphicsState & graphicsState)2905 void RenderBackendGLES::UpdateDepthState(const GraphicsState& graphicsState)
2906 {
2907 const auto& depth = graphicsState.depthStencilState;
2908 auto& cDepth = cacheState_.depthStencilState;
2909 if (depth.enableDepthTest != cDepth.enableDepthTest) {
2910 cDepth.enableDepthTest = depth.enableDepthTest;
2911 SetState(GL_DEPTH_TEST, depth.enableDepthTest);
2912 }
2913 if (depth.depthCompareOp != cDepth.depthCompareOp) {
2914 cDepth.depthCompareOp = depth.depthCompareOp;
2915 glDepthFunc(GetCompareOp(depth.depthCompareOp));
2916 }
2917 if (depth.enableDepthWrite != cDepth.enableDepthWrite) {
2918 cDepth.enableDepthWrite = depth.enableDepthWrite;
2919 glDepthMask((depth.enableDepthWrite == GL_TRUE));
2920 }
2921 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BOUNDS)) {
2922 // CORE_DYNAMIC_STATE_DEPTH_BOUNDS not supported on GLES.
2923 }
2924 }
2925
UpdateStencilState(const GraphicsState & graphicsState)2926 void RenderBackendGLES::UpdateStencilState(const GraphicsState& graphicsState)
2927 {
2928 const auto& depth = graphicsState.depthStencilState;
2929 auto& cDepth = cacheState_.depthStencilState;
2930 if (depth.enableStencilTest != cDepth.enableStencilTest) {
2931 cDepth.enableStencilTest = depth.enableStencilTest;
2932 SetState(GL_STENCIL_TEST, depth.enableStencilTest);
2933 }
2934 uint32_t setFront = 0;
2935 uint32_t setBack = 0;
2936 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_REFERENCE)) {
2937 if (cDepth.frontStencilOpState.reference != depth.frontStencilOpState.reference) {
2938 setFront |= StencilSetFlags::SETREFERENCE;
2939 }
2940 if (cDepth.backStencilOpState.reference != depth.backStencilOpState.reference) {
2941 setBack |= StencilSetFlags::SETREFERENCE;
2942 }
2943 }
2944 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
2945 if (cDepth.frontStencilOpState.compareMask != depth.frontStencilOpState.compareMask) {
2946 setFront |= StencilSetFlags::SETCOMPAREMASK;
2947 }
2948 if (cDepth.backStencilOpState.compareMask != depth.backStencilOpState.compareMask) {
2949 setBack |= StencilSetFlags::SETCOMPAREMASK;
2950 }
2951 }
2952 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
2953 if (cDepth.frontStencilOpState.writeMask != depth.frontStencilOpState.writeMask) {
2954 setFront |= StencilSetFlags::SETWRITEMASK;
2955 }
2956 if (cDepth.backStencilOpState.writeMask != depth.backStencilOpState.writeMask) {
2957 setBack |= StencilSetFlags::SETWRITEMASK;
2958 }
2959 }
2960 if (cDepth.frontStencilOpState.compareOp != depth.frontStencilOpState.compareOp) {
2961 setFront |= StencilSetFlags::SETCOMPAREOP;
2962 }
2963 if (cDepth.backStencilOpState.compareOp != depth.backStencilOpState.compareOp) {
2964 setBack |= StencilSetFlags::SETCOMPAREOP;
2965 }
2966 if (!CompareStencilOp(cDepth.frontStencilOpState, depth.frontStencilOpState)) {
2967 setFront |= StencilSetFlags::SETOP;
2968 }
2969 if (!CompareStencilOp(cDepth.backStencilOpState, depth.backStencilOpState)) {
2970 setBack |= StencilSetFlags::SETOP;
2971 }
2972 SetStencilState(setFront, depth.frontStencilOpState, setBack, depth.backStencilOpState);
2973 }
2974
UpdateDepthStencilState(const GraphicsState & graphicsState)2975 void RenderBackendGLES::UpdateDepthStencilState(const GraphicsState& graphicsState)
2976 {
2977 UpdateDepthState(graphicsState);
2978 UpdateStencilState(graphicsState);
2979 }
2980
UpdateBlendState(const GraphicsState & graphicsState)2981 void RenderBackendGLES::UpdateBlendState(const GraphicsState& graphicsState)
2982 {
2983 const auto& blend = graphicsState.colorBlendState;
2984 auto& cBlend = cacheState_.colorBlendState;
2985 for (GLuint i = 0; i < blend.colorAttachmentCount; i++) {
2986 const auto& blendState = blend.colorAttachments[i];
2987 auto& cBlendState = cBlend.colorAttachments[i];
2988 if (blendState.colorWriteMask != cBlendState.colorWriteMask) {
2989 cBlendState.colorWriteMask = blendState.colorWriteMask;
2990 glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2991 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2992 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2993 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2994 }
2995
2996 // Check if blend state has changed
2997 bool factorsChanged = false;
2998 bool opsChanged = false;
2999
3000 if (blendState.enableBlend) {
3001 factorsChanged = !CompareBlendFactors(cBlendState, blendState);
3002 opsChanged = !CompareBlendOps(cBlendState, blendState);
3003 }
3004
3005 if (blendState.enableBlend == cBlendState.enableBlend && !factorsChanged && !opsChanged) {
3006 continue;
3007 }
3008 cBlendState.enableBlend = blendState.enableBlend;
3009 if (blendState.enableBlend) {
3010 glEnablei(GL_BLEND, i);
3011 if (factorsChanged) {
3012 SetBlendFactors(cBlendState, blendState);
3013 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
3014 GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
3015 GetBlendFactor(cBlendState.dstAlphaBlendFactor));
3016 }
3017 if (opsChanged) {
3018 SetBlendOps(cBlendState, blendState);
3019 glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
3020 }
3021 } else {
3022 glDisablei(GL_BLEND, i);
3023 }
3024 }
3025 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_BLEND_CONSTANTS)) {
3026 if (!Compare(cBlend.colorBlendConstants, blend.colorBlendConstants)) {
3027 Set(cBlend.colorBlendConstants, blend.colorBlendConstants);
3028 glBlendColor(blend.colorBlendConstants[Gles::RED_INDEX], blend.colorBlendConstants[Gles::GREEN_INDEX],
3029 blend.colorBlendConstants[Gles::BLUE_INDEX], blend.colorBlendConstants[Gles::ALPHA_INDEX]);
3030 }
3031 }
3032 // logicOps in blend not supported on GLES
3033 }
3034
UpdateRasterizationState(const GraphicsState & graphicsState)3035 void RenderBackendGLES::UpdateRasterizationState(const GraphicsState& graphicsState)
3036 {
3037 const auto& rs = graphicsState.rasterizationState;
3038 auto& crs = cacheState_.rasterizationState;
3039 // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
3040 polygonMode_ = rs.polygonMode;
3041 #if RENDER_HAS_GL_BACKEND
3042 if (rs.polygonMode != crs.polygonMode) {
3043 crs.polygonMode = rs.polygonMode;
3044 SetPolygonMode(rs);
3045 }
3046 #endif
3047 if (rs.enableDepthClamp != crs.enableDepthClamp) {
3048 crs.enableDepthClamp = rs.enableDepthClamp;
3049 // NOT SUPPORTED (needs an extension)
3050 }
3051 if (rs.enableRasterizerDiscard != crs.enableRasterizerDiscard) {
3052 crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
3053 SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
3054 }
3055 if (rs.enableDepthBias != crs.enableDepthBias) {
3056 crs.enableDepthBias = rs.enableDepthBias;
3057 SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
3058 }
3059 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BIAS)) {
3060 if ((rs.depthBiasConstantFactor != crs.depthBiasConstantFactor) ||
3061 (rs.depthBiasSlopeFactor != crs.depthBiasSlopeFactor)) {
3062 crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
3063 crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
3064 glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
3065 }
3066 // depthBiasClamp NOT SUPPORTED (needs an extension)
3067 }
3068 // If cull mode Flags change...
3069 if (rs.cullModeFlags != crs.cullModeFlags) {
3070 crs.cullModeFlags = rs.cullModeFlags;
3071 SetCullMode(crs);
3072 }
3073 auto frontFace = rs.frontFace;
3074 if (!renderingToDefaultFbo_) {
3075 // Flip winding for default fbo.
3076 if (frontFace == FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE) {
3077 frontFace = FrontFace::CORE_FRONT_FACE_CLOCKWISE;
3078 } else if (frontFace == FrontFace::CORE_FRONT_FACE_CLOCKWISE) {
3079 frontFace = FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE;
3080 }
3081 }
3082 if (frontFace != crs.frontFace) {
3083 crs.frontFace = frontFace;
3084 SetFrontFace(crs);
3085 }
3086 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_LINE_WIDTH)) {
3087 if (rs.lineWidth != crs.lineWidth) {
3088 crs.lineWidth = rs.lineWidth;
3089 glLineWidth(rs.lineWidth);
3090 }
3091 }
3092 }
3093
DoGraphicsState(const GraphicsState & graphicsState)3094 void RenderBackendGLES::DoGraphicsState(const GraphicsState& graphicsState)
3095 {
3096 /// GRAPHICSSTATE inputAssembly
3097 const auto& ia = graphicsState.inputAssembly;
3098 if (ia.enablePrimitiveRestart != graphicsState.inputAssembly.enablePrimitiveRestart) {
3099 auto& cia = cacheState_.inputAssembly;
3100 cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
3101 SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
3102 }
3103 topology_ = ia.primitiveTopology;
3104 UpdateRasterizationState(graphicsState);
3105 UpdateDepthStencilState(graphicsState);
3106 UpdateBlendState(graphicsState);
3107 }
3108
SetViewport(const ViewportDesc & vd)3109 void RenderBackendGLES::SetViewport(const ViewportDesc& vd)
3110 {
3111 const bool viewportPrimed = BASE_NS::exchange(viewportPrimed_, true);
3112 const bool updateV = (!viewportPrimed) || ((vd.x != viewport_.x) || (vd.y != viewport_.y) ||
3113 (vd.width != viewport_.width) || (vd.height != viewport_.height));
3114 const bool updateD =
3115 (!viewportPrimed) || ((vd.minDepth != viewport_.minDepth) || (vd.maxDepth != viewport_.maxDepth));
3116
3117 if (updateV) {
3118 viewport_.x = vd.x;
3119 viewport_.y = vd.y;
3120 viewport_.width = vd.width;
3121 viewport_.height = vd.height;
3122 // Handle top-left / bottom-left origin conversion
3123 auto y = static_cast<GLint>(vd.y);
3124 const auto h = static_cast<GLsizei>(vd.height);
3125 if (renderingToDefaultFbo_) {
3126 if (currentFrameBuffer_) {
3127 const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
3128 y = fh - (y + h);
3129 glViewport(static_cast<GLint>(vd.x), y, static_cast<GLsizei>(vd.width), h);
3130 } else {
3131 viewportPending_ = true;
3132 }
3133 } else {
3134 glViewport(static_cast<GLint>(vd.x), y, static_cast<GLsizei>(vd.width), h);
3135 }
3136 }
3137 if (updateD) {
3138 viewport_.minDepth = vd.minDepth;
3139 viewport_.maxDepth = vd.maxDepth;
3140 glDepthRangef(vd.minDepth, vd.maxDepth);
3141 }
3142 }
3143
SetScissor(const ScissorDesc & sd)3144 void RenderBackendGLES::SetScissor(const ScissorDesc& sd)
3145 {
3146 // NOTE: scissordesc is in floats?!?
3147 const bool scissorPrimed = BASE_NS::exchange(scissorPrimed_, true);
3148 const bool updateS =
3149 (!scissorPrimed) ||
3150 ((sd.offsetX != scissorBox_.offsetX) || (sd.offsetY != scissorBox_.offsetY) ||
3151 (sd.extentWidth != scissorBox_.extentWidth) || (sd.extentHeight != scissorBox_.extentHeight));
3152 if (updateS) {
3153 scissorBox_ = sd;
3154 // Handle top-left / bottom-left origin conversion
3155 auto y = static_cast<GLint>(sd.offsetY);
3156 const auto h = static_cast<GLsizei>(sd.extentHeight);
3157 if (renderingToDefaultFbo_) {
3158 const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
3159 y = fh - (y + h);
3160 }
3161 glScissor(static_cast<GLint>(sd.offsetX), y, static_cast<GLsizei>(sd.extentWidth), h);
3162 }
3163 }
3164 RENDER_END_NAMESPACE()
3165