• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_backend_gles.h"
17 
18 #include <algorithm>
19 
20 #include <base/containers/fixed_string.h>
21 #include <render/datastore/render_data_store_render_pods.h> // NodeGraphBackbufferConfiguration...
22 #include <render/namespace.h>
23 
24 #if (RENDER_PERF_ENABLED == 1)
25 #include <core/perf/cpu_perf_scope.h>
26 #include <core/perf/intf_performance_data_manager.h>
27 
28 #include "perf/gpu_query.h"
29 #include "perf/gpu_query_manager.h"
30 #endif
31 #include "device/gpu_resource_manager.h"
32 #include "gles/device_gles.h"
33 #include "gles/gl_functions.h"
34 #include "gles/gpu_buffer_gles.h"
35 #include "gles/gpu_image_gles.h"
36 #include "gles/gpu_program_gles.h"
37 #include "gles/gpu_query_gles.h"
38 #include "gles/gpu_sampler_gles.h"
39 #include "gles/gpu_semaphore_gles.h"
40 #include "gles/node_context_descriptor_set_manager_gles.h"
41 #include "gles/node_context_pool_manager_gles.h"
42 #include "gles/pipeline_state_object_gles.h"
43 #include "gles/render_frame_sync_gles.h"
44 #include "gles/swapchain_gles.h"
45 #include "nodecontext/pipeline_descriptor_set_binder.h"
46 #include "nodecontext/render_command_list.h"
47 #include "nodecontext/render_node_graph_node_store.h" // RenderCommandFrameData
48 #include "util/log.h"
49 #include "util/render_frame_util.h"
50 
51 #define IS_BIT(value, bit) ((((value) & (bit)) == (bit)) ? true : false)
52 #define IS_BIT_GL(value, bit) ((((value) & (bit)) == (bit)) ? (GLboolean)GL_TRUE : (GLboolean)GL_FALSE)
53 
54 using namespace BASE_NS;
55 
56 // NOTE: implement missing commands, add state caching and cleanup a bit more.
57 RENDER_BEGIN_NAMESPACE()
58 namespace Gles {
59 // Indices to colorBlendConstants
60 static constexpr uint32_t RED_INDEX = 0;
61 static constexpr uint32_t GREEN_INDEX = 1;
62 static constexpr uint32_t BLUE_INDEX = 2;
63 static constexpr uint32_t ALPHA_INDEX = 3;
64 static constexpr uint32_t CUBEMAP_LAYERS = 6;
65 } // namespace Gles
66 
67 namespace {
68 constexpr GLenum LAYER_ID[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
69     GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
70     GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
71 
GetCubeMapTarget(GLenum type,uint32_t layer)72 GLenum GetCubeMapTarget(GLenum type, uint32_t layer)
73 {
74     if (type == GL_TEXTURE_CUBE_MAP) {
75         PLUGIN_ASSERT_MSG(layer < Gles::CUBEMAP_LAYERS, "Invalid cubemap index %u", layer);
76         return LAYER_ID[layer];
77     }
78     PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
79     return GL_NONE;
80 }
81 
GetTarget(GLenum type,uint32_t layer,uint32_t sampleCount)82 GLenum GetTarget(GLenum type, uint32_t layer, uint32_t sampleCount)
83 {
84     if (type == GL_TEXTURE_2D) {
85         if (sampleCount > 1) {
86             return GL_TEXTURE_2D_MULTISAMPLE;
87         }
88         return GL_TEXTURE_2D;
89     }
90     if (type == GL_TEXTURE_CUBE_MAP) {
91         PLUGIN_ASSERT_MSG(sampleCount == 1, "Cubemap texture can't have MSAA");
92         return GetCubeMapTarget(type, layer);
93     }
94     PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
95     return GL_NONE;
96 }
97 
98 struct BlitArgs {
99     uint32_t mipLevel {};
100     Size3D rect0 {};
101     Size3D rect1 {};
102     uint32_t height {};
103 };
104 
DoBlit(const Filter filter,const BlitArgs & src,const BlitArgs & dst)105 void DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)
106 {
107     // Handle top-left / bottom-left origin conversion
108     auto sy = static_cast<GLint>(src.rect0.height);
109     const auto sh = static_cast<const GLint>(src.rect1.height);
110     const auto sfh = static_cast<GLint>(src.height >> src.mipLevel);
111     sy = sfh - (sy + sh);
112     auto dy = static_cast<GLint>(dst.rect0.height);
113     const auto dh = static_cast<const GLint>(dst.rect1.height);
114     const auto dfh = static_cast<GLint>(dst.height >> dst.mipLevel);
115     dy = dfh - (dy + dh);
116     GLenum glfilter = GL_NEAREST;
117     if (filter == CORE_FILTER_NEAREST) {
118         glfilter = GL_NEAREST;
119     } else if (filter == CORE_FILTER_LINEAR) {
120         glfilter = GL_LINEAR;
121     } else {
122         PLUGIN_ASSERT_MSG(false, "RenderCommandBlitImage Invalid filter mode");
123     }
124     glBlitFramebuffer(static_cast<GLint>(src.rect0.width), sy, static_cast<GLint>(src.rect1.width), sfh,
125         static_cast<GLint>(dst.rect0.width), dy, static_cast<GLint>(dst.rect1.width), dfh, GL_COLOR_BUFFER_BIT,
126         glfilter);
127 }
128 
GetPrimFromTopology(PrimitiveTopology op)129 GLenum GetPrimFromTopology(PrimitiveTopology op)
130 {
131     switch (op) {
132         case CORE_PRIMITIVE_TOPOLOGY_POINT_LIST:
133             return GL_POINTS;
134         case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST:
135             return GL_LINES;
136         case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP:
137             return GL_LINE_STRIP;
138         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
139             return GL_TRIANGLES;
140         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
141             return GL_TRIANGLE_STRIP;
142         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
143             return GL_TRIANGLE_FAN;
144 #if defined(GL_ES_VERSION_3_2) || defined(GL_VERSION_3_2)
145             // The following are valid after gles 3.2
146         case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
147             return GL_LINES_ADJACENCY;
148         case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
149             return GL_LINE_STRIP_ADJACENCY;
150         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
151             return GL_TRIANGLES_ADJACENCY;
152         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
153             return GL_TRIANGLE_STRIP_ADJACENCY;
154         case CORE_PRIMITIVE_TOPOLOGY_PATCH_LIST:
155             return GL_PATCHES;
156 #endif
157         default:
158             PLUGIN_ASSERT_MSG(false, "Unsupported primitive topology");
159             break;
160     }
161     return GL_POINTS;
162 }
163 
GetBlendOp(BlendOp func)164 GLenum GetBlendOp(BlendOp func)
165 {
166     switch (func) {
167         case CORE_BLEND_OP_ADD:
168             return GL_FUNC_ADD;
169         case CORE_BLEND_OP_SUBTRACT:
170             return GL_FUNC_SUBTRACT;
171         case CORE_BLEND_OP_REVERSE_SUBTRACT:
172             return GL_FUNC_REVERSE_SUBTRACT;
173         case CORE_BLEND_OP_MIN:
174             return GL_MIN;
175         case CORE_BLEND_OP_MAX:
176             return GL_MAX;
177         default:
178             break;
179     }
180     return GL_FUNC_ADD;
181 }
182 
GetBlendFactor(BlendFactor factor)183 GLenum GetBlendFactor(BlendFactor factor)
184 {
185     switch (factor) {
186         case CORE_BLEND_FACTOR_ZERO:
187             return GL_ZERO;
188         case CORE_BLEND_FACTOR_ONE:
189             return GL_ONE;
190         case CORE_BLEND_FACTOR_SRC_COLOR:
191             return GL_SRC_COLOR;
192         case CORE_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
193             return GL_ONE_MINUS_SRC_COLOR;
194         case CORE_BLEND_FACTOR_DST_COLOR:
195             return GL_DST_COLOR;
196         case CORE_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
197             return GL_ONE_MINUS_DST_COLOR;
198         case CORE_BLEND_FACTOR_SRC_ALPHA:
199             return GL_SRC_ALPHA;
200         case CORE_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
201             return GL_ONE_MINUS_SRC_ALPHA;
202         case CORE_BLEND_FACTOR_DST_ALPHA:
203             return GL_DST_ALPHA;
204         case CORE_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
205             return GL_ONE_MINUS_DST_ALPHA;
206         case CORE_BLEND_FACTOR_CONSTANT_COLOR:
207             return GL_CONSTANT_COLOR;
208         case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
209             return GL_ONE_MINUS_CONSTANT_COLOR;
210         case CORE_BLEND_FACTOR_CONSTANT_ALPHA:
211             return GL_CONSTANT_ALPHA;
212         case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
213             return GL_ONE_MINUS_CONSTANT_ALPHA;
214         case CORE_BLEND_FACTOR_SRC_ALPHA_SATURATE:
215             return GL_SRC_ALPHA_SATURATE;
216             // NOTE: check the GLES3.2...
217             /* following requires EXT_blend_func_extended (dual source blending) */
218         case CORE_BLEND_FACTOR_SRC1_COLOR:
219         case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
220         case CORE_BLEND_FACTOR_SRC1_ALPHA:
221         case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
222         default:
223             break;
224     }
225     return GL_ONE;
226 }
227 
GetCompareOp(CompareOp aOp)228 GLenum GetCompareOp(CompareOp aOp)
229 {
230     switch (aOp) {
231         case CORE_COMPARE_OP_NEVER:
232             return GL_NEVER;
233         case CORE_COMPARE_OP_LESS:
234             return GL_LESS;
235         case CORE_COMPARE_OP_EQUAL:
236             return GL_EQUAL;
237         case CORE_COMPARE_OP_LESS_OR_EQUAL:
238             return GL_LEQUAL;
239         case CORE_COMPARE_OP_GREATER:
240             return GL_GREATER;
241         case CORE_COMPARE_OP_NOT_EQUAL:
242             return GL_NOTEQUAL;
243         case CORE_COMPARE_OP_GREATER_OR_EQUAL:
244             return GL_GEQUAL;
245         case CORE_COMPARE_OP_ALWAYS:
246             return GL_ALWAYS;
247         default:
248             break;
249     }
250     return GL_ALWAYS;
251 }
252 
GetStencilOp(StencilOp aOp)253 GLenum GetStencilOp(StencilOp aOp)
254 {
255     switch (aOp) {
256         case CORE_STENCIL_OP_KEEP:
257             return GL_KEEP;
258         case CORE_STENCIL_OP_ZERO:
259             return GL_ZERO;
260         case CORE_STENCIL_OP_REPLACE:
261             return GL_REPLACE;
262         case CORE_STENCIL_OP_INCREMENT_AND_CLAMP:
263             return GL_INCR;
264         case CORE_STENCIL_OP_DECREMENT_AND_CLAMP:
265             return GL_DECR;
266         case CORE_STENCIL_OP_INVERT:
267             return GL_INVERT;
268         case CORE_STENCIL_OP_INCREMENT_AND_WRAP:
269             return GL_INCR_WRAP;
270         case CORE_STENCIL_OP_DECREMENT_AND_WRAP:
271             return GL_DECR_WRAP;
272         default:
273             break;
274     }
275     return GL_KEEP;
276 }
277 
SetState(GLenum type,bool enabled)278 void SetState(GLenum type, bool enabled)
279 {
280     if (enabled) {
281         glEnable(type);
282     } else {
283         glDisable(type);
284     }
285 }
286 
SetCullMode(const GraphicsState::RasterizationState & rs)287 void SetCullMode(const GraphicsState::RasterizationState& rs)
288 {
289     SetState(GL_CULL_FACE, (rs.cullModeFlags != CORE_CULL_MODE_NONE));
290 
291     switch (rs.cullModeFlags) {
292         case CORE_CULL_MODE_FRONT_BIT:
293             glCullFace(GL_FRONT);
294             break;
295         case CORE_CULL_MODE_BACK_BIT:
296             glCullFace(GL_BACK);
297             break;
298         case CORE_CULL_MODE_FRONT_AND_BACK:
299             glCullFace(GL_FRONT_AND_BACK);
300             break;
301         case CORE_CULL_MODE_NONE:
302         default:
303             break;
304     }
305 }
306 
SetFrontFace(const GraphicsState::RasterizationState & rs)307 void SetFrontFace(const GraphicsState::RasterizationState& rs)
308 {
309     switch (rs.frontFace) {
310         case CORE_FRONT_FACE_COUNTER_CLOCKWISE:
311             glFrontFace(GL_CCW);
312             break;
313         case CORE_FRONT_FACE_CLOCKWISE:
314             glFrontFace(GL_CW);
315             break;
316         default:
317             break;
318     }
319 }
320 
321 #if RENDER_HAS_GL_BACKEND
SetPolygonMode(const GraphicsState::RasterizationState & rs)322 void SetPolygonMode(const GraphicsState::RasterizationState& rs)
323 {
324     GLenum mode;
325     switch (rs.polygonMode) {
326         default:
327         case CORE_POLYGON_MODE_FILL:
328             mode = GL_FILL;
329             break;
330         case CORE_POLYGON_MODE_LINE:
331             mode = GL_LINE;
332             break;
333         case CORE_POLYGON_MODE_POINT:
334             mode = GL_POINT;
335             break;
336     }
337     glPolygonMode(GL_FRONT_AND_BACK, mode);
338 }
339 #endif
340 
Invalidate(GLenum framebuffer,int32_t count,const GLenum invalidate[],const RenderPassDesc & rpd,const LowlevelFramebufferGL & frameBuffer)341 void Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd,
342     const LowlevelFramebufferGL& frameBuffer)
343 {
344     if (count > 0) {
345         if ((frameBuffer.width == rpd.renderArea.extentWidth) && (frameBuffer.height == rpd.renderArea.extentHeight)) {
346             // Invalidate the whole buffer.  (attachment sizes match render area)
347             glInvalidateFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate);
348         } else {
349             // invalidate only a part of the render target..
350             // NOTE: verify that this works, we might need to flip the Y axis the same way as scissors etc.
351             const auto X = static_cast<const GLint>(rpd.renderArea.offsetX);
352             const auto Y = static_cast<const GLint>(rpd.renderArea.offsetY);
353             const auto W = static_cast<const GLsizei>(rpd.renderArea.extentWidth);
354             const auto H = static_cast<const GLsizei>(rpd.renderArea.extentHeight);
355             glInvalidateSubFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate, X, Y, W, H);
356         }
357     }
358 }
359 
360 struct BlitData {
361     const GpuImagePlatformDataGL& iPlat;
362     const GpuImageDesc& imageDesc;
363     const BufferImageCopy& bufferImageCopy;
364     uintptr_t data { 0 };
365     uint64_t size { 0 };
366     uint64_t sizeOfData { 0 };
367     bool compressed { false };
368 };
369 
BlitArray(DeviceGLES & device_,const BlitData & bd)370 void BlitArray(DeviceGLES& device_, const BlitData& bd)
371 {
372     const auto& iPlat = bd.iPlat;
373     const auto& bufferImageCopy = bd.bufferImageCopy;
374     const auto& imageSubresource = bufferImageCopy.imageSubresource;
375     const auto& imageDesc = bd.imageDesc;
376     const uint32_t mip = imageSubresource.mipLevel;
377     const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
378     // NOTE: image offset depth is ignored
379     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
380     const Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
381         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height),
382         Math::min(imageSize.z, bufferImageCopy.imageExtent.depth) };
383     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
384     if (valid) {
385         uintptr_t data = bd.data;
386         const uint32_t layerCount = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
387         for (uint32_t layer = imageSubresource.baseArrayLayer; layer < layerCount; layer++) {
388             const Math::UVec3 offset3D { offset.x, offset.y, layer };
389             if (bd.compressed) {
390                 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
391                     iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
392             } else {
393                 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
394                     iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
395             }
396             data += static_cast<ptrdiff_t>(bd.sizeOfData);
397         }
398     }
399 }
400 
Blit2D(DeviceGLES & device_,const BlitData & bd)401 void Blit2D(DeviceGLES& device_, const BlitData& bd)
402 {
403     const auto& iPlat = bd.iPlat;
404     const auto& bufferImageCopy = bd.bufferImageCopy;
405     const auto& imageSubresource = bufferImageCopy.imageSubresource;
406     const auto& imageDesc = bd.imageDesc;
407     const uint32_t mip = imageSubresource.mipLevel;
408     const Math::UVec2 imageSize { imageDesc.width >> mip, imageDesc.height >> mip };
409     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
410     const Math::UVec2 extent { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
411         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height) };
412     PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == 1,
413         "RenderCommandCopyBufferImage Texture2D with baseArrayLayer!=0 && layerCount!= 1");
414     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
415     const uintptr_t data = bd.data;
416     if (valid && bd.compressed) {
417         device_.CompressedTexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent,
418             iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
419     } else if (valid) {
420         device_.TexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent, iPlat.format,
421             iPlat.dataType, reinterpret_cast<const void*>(data));
422     }
423 }
424 
Blit3D(DeviceGLES & device_,const BlitData & bd)425 void Blit3D(DeviceGLES& device_, const BlitData& bd)
426 {
427     const auto& iPlat = bd.iPlat;
428     const auto& bufferImageCopy = bd.bufferImageCopy;
429     const auto& imageSubresource = bufferImageCopy.imageSubresource;
430     const auto& imageDesc = bd.imageDesc;
431     const uint32_t mip = imageSubresource.mipLevel;
432     const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth >> mip };
433     const Math::UVec3 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height,
434         bufferImageCopy.imageOffset.depth };
435     Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
436         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height), Math::min(imageSize.z - offset.z, 1U) };
437     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
438     if (valid) {
439         uintptr_t data = bd.data;
440         for (uint32_t slice = 0U; slice < imageSize.z; ++slice) {
441             const Math::UVec3 offset3D { offset.x, offset.y, slice };
442             if (bd.compressed) {
443                 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
444                     iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
445             } else {
446                 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
447                     iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
448             }
449             // offsets one slice
450             data += static_cast<uintptr_t>(bd.sizeOfData);
451         }
452     }
453 }
454 
BlitCube(DeviceGLES & device_,const BlitData & bd)455 void BlitCube(DeviceGLES& device_, const BlitData& bd)
456 {
457     const auto& iPlat = bd.iPlat;
458     const auto& bufferImageCopy = bd.bufferImageCopy;
459     const auto& imageSubresource = bufferImageCopy.imageSubresource;
460     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
461     const Math::UVec2 extent { bufferImageCopy.imageExtent.width, bufferImageCopy.imageExtent.height };
462     constexpr GLenum faceId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
463         GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
464         GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
465     PLUGIN_UNUSED(Gles::CUBEMAP_LAYERS);
466     PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == Gles::CUBEMAP_LAYERS,
467         "RenderCommandCopyBufferImage Cubemap with baseArrayLayer!=0 && layerCount!= 6");
468     uintptr_t data = bd.data;
469     const uint32_t lastLayer = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
470     for (uint32_t i = imageSubresource.baseArrayLayer; i < lastLayer; i++) {
471         const GLenum face = faceId[i]; // convert layer index to cube map face id.
472         if (face == 0) {
473             // reached the end of cubemap faces (see faceId)
474             // so must stop copying.
475             break;
476         }
477         if (bd.compressed) {
478             device_.CompressedTexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent,
479                 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
480         } else {
481             device_.TexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent, iPlat.format,
482                 iPlat.dataType, reinterpret_cast<const void*>(data));
483         }
484         data += static_cast<uintptr_t>(bd.sizeOfData);
485     }
486 }
487 template<bool usePixelUnpackBuffer>
488 
SetupBlit(DeviceGLES & device_,const BufferImageCopy & bufferImageCopy,GpuBufferGLES & srcGpuBuffer,const GpuImageGLES & dstGpuImage)489 BlitData SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer,
490     const GpuImageGLES& dstGpuImage)
491 {
492     const auto& iPlat = dstGpuImage.GetPlatformData();
493     const auto& imageOffset = bufferImageCopy.imageOffset;
494     PLUGIN_UNUSED(imageOffset);
495     const auto& imageExtent = bufferImageCopy.imageExtent;
496     auto width = (!bufferImageCopy.bufferImageHeight || bufferImageCopy.bufferRowLength)
497                      ? bufferImageCopy.imageExtent.width
498                      : bufferImageCopy.bufferRowLength;
499     auto height = (!bufferImageCopy.bufferImageHeight || bufferImageCopy.bufferRowLength)
500                       ? bufferImageCopy.imageExtent.height
501                       : bufferImageCopy.bufferImageHeight;
502     // size is calculated for single layer / slice
503     const uint64_t size =
504         static_cast<uint64_t>(iPlat.bytesperpixel) * static_cast<uint64_t>(width) * static_cast<uint64_t>(height);
505     uintptr_t data = bufferImageCopy.bufferOffset;
506     if constexpr (usePixelUnpackBuffer) {
507         const auto& plat = srcGpuBuffer.GetPlatformData();
508         device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, plat.buffer);
509     } else {
510         // Use the mapped pointer for glTexSubImage2D, this is a workaround on GL_INVALID_OPERATION on PVR GLES
511         // simulator and crash with ETC2 textures on NVIDIA..
512         data += reinterpret_cast<uintptr_t>(srcGpuBuffer.Map());
513     }
514     uint64_t sizeOfData = size;
515     const auto& compinfo = iPlat.compression;
516     if (compinfo.compressed) {
517         // how many blocks in width
518         const int64_t blockW = (imageExtent.width + (compinfo.blockW - 1)) / compinfo.blockW;
519         // how many blocks in height
520         const int64_t blockH = (imageExtent.height + (compinfo.blockH - 1)) / compinfo.blockH;
521         // size in bytes..
522         sizeOfData = static_cast<uint64_t>(((blockW * blockH) * compinfo.bytesperblock));
523 
524         // Warn for partial copies. we do not handle those at the moment.
525         if (bufferImageCopy.bufferRowLength != 0) {
526             if (bufferImageCopy.bufferRowLength != blockW * compinfo.blockW) {
527                 PLUGIN_LOG_W("Partial copies of compressed texture data is not currently supported. "
528                              "Stride must match image width (with block align). "
529                              "bufferImageCopy.bufferRowLength(%d) "
530                              "imageExtent.width(%d) ",
531                     bufferImageCopy.bufferRowLength, imageExtent.width);
532             }
533         }
534         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
535         glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0);
536     } else {
537         glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(bufferImageCopy.bufferRowLength));
538         glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, static_cast<GLint>(bufferImageCopy.bufferImageHeight));
539     }
540     glPixelStorei(GL_UNPACK_ALIGNMENT, 1); // Make sure the align is tight.
541     return { iPlat, dstGpuImage.GetDesc(), bufferImageCopy, data, size, sizeOfData, compinfo.compressed };
542 }
543 
544 template<bool usePixelUnpackBuffer>
FinishBlit(DeviceGLES & device_,const GpuBufferGLES & srcGpuBuffer)545 void FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)
546 {
547     if constexpr (usePixelUnpackBuffer) {
548         device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
549     } else {
550         srcGpuBuffer.Unmap();
551     }
552 }
553 
554 template<typename T, size_t N>
Compare(const T (& a)[N],const T (& b)[N])555 constexpr size_t Compare(const T (&a)[N], const T (&b)[N])
556 {
557     for (size_t i = 0; i < N; i++) {
558         if (a[i] != b[i])
559             return false;
560     }
561     return true;
562 }
563 
564 template<typename T, size_t N>
565 
Set(T (& a)[N],const T (& b)[N])566 constexpr size_t Set(T (&a)[N], const T (&b)[N])
567 {
568     for (size_t i = 0; i < N; i++) {
569         a[i] = b[i];
570     }
571     return true;
572 }
573 
CompareBlendFactors(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)574 bool CompareBlendFactors(
575     const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
576 {
577     return (a.srcColorBlendFactor == b.srcColorBlendFactor) && (a.srcAlphaBlendFactor == b.srcAlphaBlendFactor) &&
578            (a.dstColorBlendFactor == b.dstColorBlendFactor) && (a.dstAlphaBlendFactor == b.dstAlphaBlendFactor);
579 }
580 
SetBlendFactors(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)581 void SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
582 {
583     a.srcColorBlendFactor = b.srcColorBlendFactor;
584     a.srcAlphaBlendFactor = b.srcAlphaBlendFactor;
585     a.dstColorBlendFactor = b.dstColorBlendFactor;
586     a.dstAlphaBlendFactor = b.dstAlphaBlendFactor;
587 }
588 
CompareBlendOps(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)589 bool CompareBlendOps(
590     const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
591 {
592     return (a.colorBlendOp == b.colorBlendOp) && (a.alphaBlendOp == b.alphaBlendOp);
593 }
594 
SetBlendOps(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)595 void SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
596 {
597     a.colorBlendOp = b.colorBlendOp;
598     a.alphaBlendOp = b.alphaBlendOp;
599 }
600 
CompareStencilOp(const GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)601 bool CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
602 {
603     return (a.failOp == b.failOp) && (a.depthFailOp == b.depthFailOp) && (a.passOp == b.passOp);
604 }
605 
SetStencilOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)606 void SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
607 {
608     a.failOp = b.failOp;
609     a.depthFailOp = b.depthFailOp;
610     a.passOp = b.passOp;
611 }
612 
SetStencilCompareOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)613 void SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
614 {
615     a.compareOp = b.compareOp;
616     a.compareMask = b.compareMask;
617     a.reference = b.reference;
618 }
619 
620 #if RENDER_VALIDATION_ENABLED
ValidateCopyImage(const Offset3D & offset,const Size3D & extent,uint32_t mipLevel,const GpuImageDesc & imageDesc)621 void ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)
622 {
623     if (mipLevel >= imageDesc.mipCount) {
624         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage mipLevel must be less than image mipCount.");
625     }
626     if ((offset.x < 0) || (offset.y < 0) || (offset.z < 0)) {
627         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset must not be negative.");
628     }
629     if (((offset.x + extent.width) > imageDesc.width) || ((offset.y + extent.height) > imageDesc.height) ||
630         ((offset.z + extent.depth) > imageDesc.depth)) {
631         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset + extent does not fit in image.");
632     }
633 }
634 
ValidateCopyImage(const ImageCopy & imageCopy,const GpuImageDesc & srcImageDesc,const GpuImageDesc & dstImageDesc)635 void ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)
636 {
637     ValidateCopyImage(imageCopy.srcOffset, imageCopy.extent, imageCopy.srcSubresource.mipLevel, srcImageDesc);
638     ValidateCopyImage(imageCopy.dstOffset, imageCopy.extent, imageCopy.dstSubresource.mipLevel, dstImageDesc);
639 }
640 #endif
641 
ClampOffset(int32_t & srcOffset,int32_t & dstOffset,uint32_t & size)642 constexpr void ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)
643 {
644     if (srcOffset < 0) {
645         auto iSize = static_cast<int32_t>(size);
646         size = static_cast<uint32_t>(iSize + srcOffset);
647         dstOffset -= srcOffset;
648         srcOffset = 0;
649     }
650 }
651 
ClampOffset(Offset3D & srcOffset,Offset3D & dstOffset,Size3D & size)652 constexpr void ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)
653 {
654     ClampOffset(srcOffset.x, dstOffset.x, size.width);
655     ClampOffset(srcOffset.y, dstOffset.y, size.height);
656     ClampOffset(srcOffset.z, dstOffset.z, size.depth);
657 }
658 
ClampSize(int32_t offset,uint32_t maxSize,uint32_t & size)659 constexpr void ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)
660 {
661     if (size > static_cast<uint32_t>(static_cast<int32_t>(maxSize) - offset)) {
662         size = static_cast<uint32_t>(static_cast<int32_t>(maxSize) - offset);
663     }
664 }
665 
ClampSize(const Offset3D & offset,const GpuImageDesc & desc,Size3D & size)666 constexpr void ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)
667 {
668     ClampSize(offset.x, desc.width, size.width);
669     ClampSize(offset.y, desc.height, size.height);
670     ClampSize(offset.z, desc.depth, size.depth);
671 }
672 
673 // helper which covers barriers supported by Barrier and BarrierByRegion
CommonBarrierBits(AccessFlags accessFlags,RenderHandleType resourceType)674 constexpr GLbitfield CommonBarrierBits(AccessFlags accessFlags, RenderHandleType resourceType)
675 {
676     GLbitfield barriers = 0;
677     if (accessFlags & CORE_ACCESS_UNIFORM_READ_BIT) {
678         barriers |= GL_UNIFORM_BARRIER_BIT;
679     }
680     if (accessFlags & CORE_ACCESS_SHADER_READ_BIT) {
681         // shader read covers UBO, SSBO, storage image etc. use resource type to limit the options.
682         if (resourceType == RenderHandleType::GPU_IMAGE) {
683             barriers |= GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
684         } else if (resourceType == RenderHandleType::GPU_BUFFER) {
685             barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
686         } else {
687             barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
688                         GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
689         }
690     }
691     if (accessFlags & CORE_ACCESS_SHADER_WRITE_BIT) {
692         if (resourceType == RenderHandleType::GPU_IMAGE) {
693             barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
694         } else if (resourceType == RenderHandleType::GPU_BUFFER) {
695             barriers |= GL_SHADER_STORAGE_BARRIER_BIT;
696         } else {
697             barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
698         }
699     }
700     if (accessFlags & (CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT |
701                           CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)) {
702         barriers |= GL_FRAMEBUFFER_BARRIER_BIT;
703     }
704     // GL_ATOMIC_COUNTER_BARRIER_BIT is not used at the moment
705     return barriers;
706 }
707 } // namespace
708 
RenderBackendGLES(Device & device,GpuResourceManager & gpuResourceManager)709 RenderBackendGLES::RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)
710     : RenderBackend(), device_(static_cast<DeviceGLES&>(device)), gpuResourceMgr_(gpuResourceManager)
711 {
712 #if (RENDER_PERF_ENABLED == 1)
713     validGpuQueries_ = false;
714 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
715     gpuQueryMgr_ = make_unique<GpuQueryManager>();
716 #if RENDER_HAS_GL_BACKEND
717     if (device_.GetBackendType() == DeviceBackendType::OPENGL) {
718         validGpuQueries_ = true;
719     }
720 #endif
721 #if RENDER_HAS_GLES_BACKEND
722     if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
723         // Check if GL_EXT_disjoint_timer_query is available.
724         validGpuQueries_ = device_.HasExtension("GL_EXT_disjoint_timer_query");
725     }
726 #endif
727 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
728 #endif // RENDER_PERF_ENABLED
729 #if RENDER_HAS_GLES_BACKEND
730     if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
731         multisampledRenderToTexture_ = device_.HasExtension("GL_EXT_multisampled_render_to_texture2");
732     }
733 #endif
734     PLUGIN_ASSERT(device_.IsActive());
735     PrimeCache(GraphicsState {}); // Initializes cache.
736     glGenFramebuffers(1, &blitImageSourceFbo_);
737     glGenFramebuffers(1, &blitImageDestinationFbo_);
738 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
739     PLUGIN_LOG_D("fbo id >: %u", blitImageSourceFbo_);
740     PLUGIN_LOG_D("fbo id >: %u", blitImageDestinationFbo_);
741 #endif
742 #if !RENDER_HAS_GLES_BACKEND
743     glEnable(GL_PROGRAM_POINT_SIZE);
744 #endif
745 }
746 
~RenderBackendGLES()747 RenderBackendGLES::~RenderBackendGLES()
748 {
749     PLUGIN_ASSERT(device_.IsActive());
750     device_.DeleteFrameBuffer(blitImageSourceFbo_);
751     device_.DeleteFrameBuffer(blitImageDestinationFbo_);
752 }
753 
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)754 void RenderBackendGLES::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
755 {
756     if (!backBufferConfig.swapchainData.empty()) {
757         if (device_.HasSwapchain()) {
758 #if (RENDER_PERF_ENABLED == 1)
759             commonCpuTimers_.present.Begin();
760 #endif
761             for (const auto& swapchainData : backBufferConfig.swapchainData) {
762 #if (RENDER_DEV_ENABLED == 1)
763                 if (swapchainData.config.gpuSemaphoreHandle) {
764                     // NOTE: not implemented
765                     PLUGIN_LOG_E("NodeGraphBackBufferConfiguration semaphore not signaled");
766                 }
767 #endif
768                 const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapchainData.handle));
769                 if (swp) {
770 #if RENDER_GL_FLIP_Y_SWAPCHAIN
771                     // Blit and flip our swapchain frame to backbuffer..
772                     const auto& sdesc = swp->GetDesc();
773                     if (scissorEnabled_) {
774                         glDisable(GL_SCISSOR_TEST);
775                         scissorEnabled_ = false;
776                     }
777                     const auto& platSwapchain = swp->GetPlatformData();
778                     device_.BindReadFrameBuffer(platSwapchain.fbos[presentationInfo_.swapchainImageIndex]);
779                     device_.BindWriteFrameBuffer(0); // FBO 0  is the surface bound to current context..
780                     glBlitFramebuffer(0, 0, (GLint)sdesc.width, (GLint)sdesc.height, 0, (GLint)sdesc.height,
781                         (GLint)sdesc.width, 0, GL_COLOR_BUFFER_BIT, GL_NEAREST);
782                     device_.BindReadFrameBuffer(0);
783 #endif
784                     device_.SwapBuffers(*swp);
785                 }
786             }
787 #if (RENDER_PERF_ENABLED == 1)
788             commonCpuTimers_.present.End();
789 #endif
790         }
791     }
792 }
793 
ResetState()794 void RenderBackendGLES::ResetState()
795 {
796     boundProgram_ = {};
797     boundIndexBuffer_ = {};
798     vertexAttribBinds_ = 0;
799     renderingToDefaultFbo_ = false;
800     boundComputePipeline_ = nullptr;
801     boundGraphicsPipeline_ = nullptr;
802     currentPsoHandle_ = {};
803     renderArea_ = {};
804     activeRenderPass_ = {};
805     currentSubPass_ = 0;
806     currentFrameBuffer_ = nullptr;
807     inRenderpass_ = 0;
808     descriptorUpdate_ = false;
809     vertexBufferUpdate_ = false;
810     indexBufferUpdate_ = false;
811 }
812 
ResetBindings()813 void RenderBackendGLES::ResetBindings()
814 {
815     boundComputePipeline_ = nullptr;
816     boundGraphicsPipeline_ = nullptr;
817     currentPsoHandle_ = {};
818 }
819 
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)820 void RenderBackendGLES::Render(
821     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
822 {
823     // NOTE: all command lists are validated before entering here
824     PLUGIN_ASSERT(device_.IsActive());
825 #if (RENDER_PERF_ENABLED == 1)
826     commonCpuTimers_.full.Begin();
827     commonCpuTimers_.acquire.Begin();
828 #endif
829     presentationInfo_ = {};
830 
831     if (device_.HasSwapchain() && (!backBufferConfig.swapchainData.empty())) {
832         for (const auto& swapData : backBufferConfig.swapchainData) {
833             if (const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapData.handle))) {
834                 presentationInfo_.swapchainImageIndex = swp->GetNextImage();
835                 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
836                 if (presentationInfo_.swapchainImageIndex < swapchainData.imageViewCount) {
837                     // remap image to backbuffer
838                     const RenderHandle currentSwapchainHandle =
839                         swapchainData.imageViews[presentationInfo_.swapchainImageIndex];
840                     // special swapchain remapping
841                     gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(swapData.handle, currentSwapchainHandle);
842                 }
843             }
844         }
845     }
846 #if (RENDER_PERF_ENABLED == 1)
847     commonCpuTimers_.acquire.End();
848 
849     StartFrameTimers(renderCommandFrameData);
850     commonCpuTimers_.execute.Begin();
851 #endif
852     // global begin backend frame
853     auto& descriptorSetMgr = (DescriptorSetManagerGles&)device_.GetDescriptorSetManager();
854     descriptorSetMgr.BeginBackendFrame();
855 
856     // Reset bindings.
857     ResetState();
858 
859     // Update global descset if needed
860     UpdateGlobalDescriptorSets();
861 
862     for (const auto& ref : renderCommandFrameData.renderCommandContexts) {
863         // Reset bindings between command lists..
864         ResetBindings();
865         RenderSingleCommandList(ref);
866     }
867 #if (RENDER_PERF_ENABLED == 1)
868     commonCpuTimers_.execute.End();
869 #endif
870     RenderProcessEndCommandLists(renderCommandFrameData, backBufferConfig);
871 #if (RENDER_PERF_ENABLED == 1)
872     commonCpuTimers_.full.End();
873     EndFrameTimers();
874 #endif
875 }
876 
RenderProcessEndCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)877 void RenderBackendGLES::RenderProcessEndCommandLists(
878     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
879 {
880     if (auto* frameSync = static_cast<RenderFrameSyncGLES*>(renderCommandFrameData.renderFrameSync); frameSync) {
881         frameSync->GetFrameFence();
882     }
883     // signal external GPU fences
884     if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
885         auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
886         const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
887         PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
888         if (externalSignals.size() == externalSemaphores.size()) {
889             for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
890                 // needs to be false
891                 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
892                     if (const auto* gs = (const GpuSemaphoreGles*)externalSemaphores[sigIdx].get(); gs) {
893                         auto& plat = const_cast<GpuSemaphorePlatformDataGles&>(gs->GetPlatformData());
894                         // NOTE: currently could create only one GPU sync
895                         GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
896                         plat.sync = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sync));
897                         externalSignals[sigIdx].gpuSignalResourceHandle = plat.sync;
898                         externalSignals[sigIdx].signaled = true;
899 
900                         // NOTE: client is expected to add code for the wait with glClientWaitSync(sync, X, 0)
901                     }
902                 }
903             }
904         }
905     }
906 }
907 
RenderCommandUndefined(const RenderCommandWithType & renderCommand)908 void RenderBackendGLES::RenderCommandUndefined(const RenderCommandWithType& renderCommand)
909 {
910     PLUGIN_ASSERT_MSG(false, "non-valid render command");
911 }
912 
RenderSingleCommandList(const RenderCommandContext & renderCommandCtx)913 void RenderBackendGLES::RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)
914 {
915     // these are validated in render graph
916     managers_ = { renderCommandCtx.nodeContextPsoMgr, renderCommandCtx.nodeContextPoolMgr,
917         renderCommandCtx.nodeContextDescriptorSetMgr, renderCommandCtx.renderBarrierList };
918 
919     managers_.poolMgr->BeginBackendFrame();
920     managers_.psoMgr->BeginBackendFrame();
921 
922     // update cmd list context descriptor sets
923     UpdateCommandListDescriptorSets(*renderCommandCtx.renderCommandList, *renderCommandCtx.nodeContextDescriptorSetMgr);
924 
925 #if (RENDER_PERF_ENABLED == 1) || (RENDER_DEBUG_MARKERS_ENABLED == 1)
926     const auto& debugName = renderCommandCtx.debugName;
927 #endif
928 #if (RENDER_PERF_ENABLED == 1)
929     perfCounters_ = {};
930     PLUGIN_ASSERT(timers_.count(debugName) == 1);
931     PerfDataSet& perfDataSet = timers_[debugName];
932     perfDataSet.cpuTimer.Begin();
933 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
934     if (validGpuQueries_) {
935 #ifdef GL_GPU_DISJOINT_EXT
936         /* Clear disjoint error */
937         GLint disjointOccurred = 0;
938         glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
939 #endif
940         GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
941         PLUGIN_ASSERT(gpuQuery);
942 
943         const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
944         PLUGIN_ASSERT(platData.queryObject);
945         glBeginQuery(GL_TIME_ELAPSED_EXT, platData.queryObject);
946     }
947 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
948 #endif // RENDER_PERF_ENABLED
949 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
950     glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)debugName.data());
951 #endif
952     commandListValid_ = true;
953     for (const auto& ref : renderCommandCtx.renderCommandList->GetRenderCommands()) {
954         PLUGIN_ASSERT(ref.rc);
955         if (commandListValid_) {
956 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
957             glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)COMMAND_NAMES[(uint32_t)ref.type]);
958 #endif
959             (this->*(COMMAND_HANDLERS[static_cast<uint32_t>(ref.type)]))(ref);
960 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
961             glPopDebugGroup();
962 #endif
963         }
964     }
965 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
966     glPopDebugGroup();
967 #endif
968 #if (RENDER_PERF_ENABLED == 1)
969 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
970     if (validGpuQueries_) {
971         glEndQuery(GL_TIME_ELAPSED_EXT);
972     }
973 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
974     perfDataSet.cpuTimer.End();
975     CopyPerfTimeStamp(debugName, perfDataSet);
976 #endif // RENDER_PERF_ENABLED
977 }
978 
RenderCommandBindPipeline(const RenderCommandWithType & ref)979 void RenderBackendGLES::RenderCommandBindPipeline(const RenderCommandWithType& ref)
980 {
981     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_PIPELINE);
982     const auto& renderCmd = *static_cast<const struct RenderCommandBindPipeline*>(ref.rc);
983     boundProgram_ = {};
984     if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE) {
985         PLUGIN_ASSERT(currentFrameBuffer_ == nullptr);
986         BindComputePipeline(renderCmd);
987     } else if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
988         BindGraphicsPipeline(renderCmd);
989     }
990     descriptorUpdate_ = true;
991     currentPsoHandle_ = renderCmd.psoHandle;
992 }
993 
BindComputePipeline(const struct RenderCommandBindPipeline & renderCmd)994 void RenderBackendGLES::BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)
995 {
996     const auto* pso = static_cast<const ComputePipelineStateObjectGLES*>(
997         managers_.psoMgr->GetComputePso(renderCmd.psoHandle, nullptr));
998     boundComputePipeline_ = pso;
999     boundGraphicsPipeline_ = nullptr;
1000     boundComputeProgram_ = nullptr;
1001     boundShaderProgram_ = nullptr;
1002     if (!boundComputePipeline_) {
1003         return;
1004     }
1005 
1006     // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
1007     const auto& pipelineData =
1008         static_cast<const PipelineStateObjectPlatformDataGL&>(boundComputePipeline_->GetPlatformData());
1009     if (!pipelineData.computeShader) {
1010         return;
1011     }
1012     boundComputeProgram_ = pipelineData.computeShader;
1013     const auto& sd = static_cast<const GpuComputeProgramPlatformDataGL&>(pipelineData.computeShader->GetPlatformData());
1014     const uint32_t program = sd.program;
1015 #if (RENDER_PERF_ENABLED == 1)
1016     if (device_.BoundProgram() != program) {
1017         ++perfCounters_.bindProgram;
1018     }
1019 #endif
1020     device_.UseProgram(program);
1021 
1022     if (sd.flipLocation != Gles::INVALID_LOCATION) {
1023         const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
1024         glProgramUniform1fv(program, sd.flipLocation, 1, &flip);
1025     }
1026 }
1027 
BindGraphicsPipeline(const struct RenderCommandBindPipeline & renderCmd)1028 void RenderBackendGLES::BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)
1029 {
1030     const auto* pso = static_cast<const GraphicsPipelineStateObjectGLES*>(
1031         managers_.psoMgr->GetGraphicsPso(renderCmd.psoHandle, activeRenderPass_.renderPassDesc,
1032             activeRenderPass_.subpasses, activeRenderPass_.subpassStartIndex, 0, nullptr, nullptr));
1033     boundComputePipeline_ = nullptr;
1034     boundGraphicsPipeline_ = pso;
1035     boundComputeProgram_ = nullptr;
1036     boundShaderProgram_ = nullptr;
1037     if (!boundGraphicsPipeline_ || !currentFrameBuffer_) {
1038         return;
1039     }
1040 
1041     const auto& pipelineData = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1042     dynamicStateFlags_ = pipelineData.dynamicStateFlags;
1043     DoGraphicsState(pipelineData.graphicsState);
1044     // NOTE: Deprecate (default viewport/scissor should be set from default targets at some point)
1045     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_VIEWPORT)) {
1046         SetViewport(ViewportDesc { 0.0f, 0.0f, static_cast<float>(renderArea_.extentWidth),
1047             static_cast<float>(renderArea_.extentHeight), 0.0f, 1.0f });
1048     }
1049     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_SCISSOR)) {
1050         SetScissor(ScissorDesc { 0, 0, renderArea_.extentWidth, renderArea_.extentHeight });
1051     }
1052     const GpuShaderProgramGLES* shader = pipelineData.graphicsShader;
1053     if (!shader) {
1054         return;
1055     }
1056     boundShaderProgram_ = shader;
1057     const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
1058     // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
1059     if (!scissorEnabled_) {
1060         scissorEnabled_ = true;
1061         glEnable(GL_SCISSOR_TEST); // Always enabled
1062     }
1063     uint32_t program = sd.program;
1064 #if (RENDER_PERF_ENABLED == 1)
1065     if (device_.BoundProgram() != program) {
1066         ++perfCounters_.bindProgram;
1067     }
1068 #endif
1069     device_.UseProgram(program);
1070     device_.BindVertexArray(pipelineData.vao);
1071     vertexBufferUpdate_ = true;
1072     indexBufferUpdate_ = true;
1073 
1074     if (sd.flipLocation != Gles::INVALID_LOCATION) {
1075         const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
1076         glProgramUniform1fv(program, sd.flipLocation, 1, &flip);
1077     }
1078 }
1079 
RenderCommandDraw(const RenderCommandWithType & ref)1080 void RenderBackendGLES::RenderCommandDraw(const RenderCommandWithType& ref)
1081 {
1082     PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW);
1083     const auto& renderCmd = *static_cast<struct RenderCommandDraw*>(ref.rc);
1084     if (!boundGraphicsPipeline_) {
1085         return;
1086     }
1087     PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1088     BindResources();
1089     if (vertexBufferUpdate_ && boundShaderProgram_) {
1090         vertexBufferUpdate_ = false;
1091         const auto& pipelineData =
1092             static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
1093         const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(boundShaderProgram_->GetPlatformData());
1094         BindVertexInputs(pipelineData.vertexInputDeclaration, array_view<const int32_t>(sd.inputs, countof(sd.inputs)));
1095     }
1096     if (indexBufferUpdate_) {
1097         indexBufferUpdate_ = false;
1098         device_.BindElementBuffer(boundIndexBuffer_.id);
1099     }
1100     const auto type = GetPrimFromTopology(topology_);
1101     const auto instanceCount = static_cast<GLsizei>(renderCmd.instanceCount);
1102     // firstInstance is not supported yet, need to set the SPIRV_Cross generated uniform
1103     // "SPIRV_Cross_BaseInstance" to renderCmd.firstInstance;
1104     if (renderCmd.indexCount) {
1105         uintptr_t offsetp = boundIndexBuffer_.offset;
1106         GLenum indexType = GL_UNSIGNED_SHORT;
1107         switch (boundIndexBuffer_.type) {
1108             case CORE_INDEX_TYPE_UINT16:
1109                 offsetp += renderCmd.firstIndex * sizeof(uint16_t);
1110                 indexType = GL_UNSIGNED_SHORT;
1111                 break;
1112             case CORE_INDEX_TYPE_UINT32:
1113                 offsetp += renderCmd.firstIndex * sizeof(uint32_t);
1114                 indexType = GL_UNSIGNED_INT;
1115                 break;
1116             default:
1117                 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1118                 break;
1119         }
1120         const auto indexCount = static_cast<const GLsizei>(renderCmd.indexCount);
1121         const auto vertexOffset = static_cast<const GLsizei>(renderCmd.vertexOffset);
1122         const void* offset = reinterpret_cast<const void*>(offsetp);
1123         if (renderCmd.instanceCount > 1) {
1124             if (vertexOffset) {
1125                 glDrawElementsInstancedBaseVertex(type, indexCount, indexType, offset, instanceCount, vertexOffset);
1126             } else {
1127                 glDrawElementsInstanced(type, indexCount, indexType, offset, instanceCount);
1128             }
1129         } else {
1130             if (vertexOffset) {
1131                 glDrawElementsBaseVertex(type, indexCount, indexType, offset, vertexOffset);
1132             } else {
1133                 glDrawElements(type, indexCount, indexType, offset);
1134             }
1135         }
1136 #if (RENDER_PERF_ENABLED == 1)
1137         ++perfCounters_.drawCount;
1138         perfCounters_.instanceCount += renderCmd.instanceCount;
1139         perfCounters_.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1140 #endif
1141     } else {
1142         const auto firstVertex = static_cast<const GLsizei>(renderCmd.firstVertex);
1143         const auto vertexCount = static_cast<const GLsizei>(renderCmd.vertexCount);
1144         if (renderCmd.instanceCount > 1) {
1145             glDrawArraysInstanced(type, firstVertex, vertexCount, instanceCount);
1146         } else {
1147             glDrawArrays(type, firstVertex, vertexCount);
1148         }
1149 #if (RENDER_PERF_ENABLED == 1)
1150         ++perfCounters_.drawCount;
1151         perfCounters_.instanceCount += renderCmd.instanceCount;
1152         perfCounters_.triangleCount += (renderCmd.vertexCount * 3) * renderCmd.instanceCount; // 3: vertex dimension
1153 #endif
1154     }
1155 }
1156 
RenderCommandDrawIndirect(const RenderCommandWithType & ref)1157 void RenderBackendGLES::RenderCommandDrawIndirect(const RenderCommandWithType& ref)
1158 {
1159     PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW_INDIRECT);
1160     const auto& renderCmd = *static_cast<const struct RenderCommandDrawIndirect*>(ref.rc);
1161     if (!boundGraphicsPipeline_) {
1162         return;
1163     }
1164     PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1165     if (vertexBufferUpdate_ && boundShaderProgram_) {
1166         vertexBufferUpdate_ = false;
1167         const auto& pipelineData =
1168             static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
1169         const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(boundShaderProgram_->GetPlatformData());
1170         BindVertexInputs(pipelineData.vertexInputDeclaration, array_view<const int32_t>(sd.inputs, countof(sd.inputs)));
1171     }
1172     if (indexBufferUpdate_) {
1173         indexBufferUpdate_ = false;
1174         device_.BindElementBuffer(boundIndexBuffer_.id);
1175     }
1176     BindResources();
1177     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1178         const auto& plat = gpuBuffer->GetPlatformData();
1179         device_.BindBuffer(GL_DRAW_INDIRECT_BUFFER, plat.buffer);
1180         const auto type = GetPrimFromTopology(topology_);
1181         auto offset = static_cast<GLintptr>(renderCmd.offset);
1182         if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1183             GLenum indexType = GL_UNSIGNED_SHORT;
1184             switch (boundIndexBuffer_.type) {
1185                 case CORE_INDEX_TYPE_UINT16:
1186                     indexType = GL_UNSIGNED_SHORT;
1187                     break;
1188                 case CORE_INDEX_TYPE_UINT32:
1189                     indexType = GL_UNSIGNED_INT;
1190                     break;
1191                 default:
1192                     PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1193                     break;
1194             }
1195             for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1196                 glDrawElementsIndirect(type, indexType, reinterpret_cast<const void*>(offset));
1197                 offset += renderCmd.stride;
1198             }
1199         } else {
1200             for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1201                 glDrawArraysIndirect(type, reinterpret_cast<const void*>(offset));
1202                 offset += renderCmd.stride;
1203             }
1204         }
1205 #if (RENDER_PERF_ENABLED == 1)
1206         perfCounters_.drawIndirectCount += renderCmd.drawCount;
1207 #endif
1208     }
1209 }
1210 
RenderCommandDispatch(const RenderCommandWithType & ref)1211 void RenderBackendGLES::RenderCommandDispatch(const RenderCommandWithType& ref)
1212 {
1213     PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH);
1214     const auto& renderCmd = *static_cast<const struct RenderCommandDispatch*>(ref.rc);
1215     if (!boundComputePipeline_) {
1216         return;
1217     }
1218     PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1219     BindResources();
1220     glDispatchCompute(renderCmd.groupCountX, renderCmd.groupCountY, renderCmd.groupCountZ);
1221 #if (RENDER_PERF_ENABLED == 1)
1222     ++perfCounters_.dispatchCount;
1223 #endif
1224 }
1225 
RenderCommandDispatchIndirect(const RenderCommandWithType & ref)1226 void RenderBackendGLES::RenderCommandDispatchIndirect(const RenderCommandWithType& ref)
1227 {
1228     PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH_INDIRECT);
1229     const auto& renderCmd = *static_cast<const struct RenderCommandDispatchIndirect*>(ref.rc);
1230     if (!boundComputePipeline_) {
1231         return;
1232     }
1233     PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1234     BindResources();
1235     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1236         const auto& plat = gpuBuffer->GetPlatformData();
1237         device_.BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, plat.buffer);
1238         glDispatchComputeIndirect(static_cast<GLintptr>(renderCmd.offset));
1239 #if (RENDER_PERF_ENABLED == 1)
1240         ++perfCounters_.dispatchIndirectCount;
1241 #endif
1242     }
1243 }
1244 
ClearScissorInit(const RenderPassDesc::RenderArea & aArea)1245 void RenderBackendGLES::ClearScissorInit(const RenderPassDesc::RenderArea& aArea)
1246 {
1247     resetScissor_ = false;   // need to reset scissor state after clear?
1248     clearScissorSet_ = true; // need to setup clear scissors before clear?
1249     clearScissor_ = aArea;   // area to be cleared
1250     if (scissorPrimed_) {    // have scissors been set yet?
1251         if ((clearScissor_.offsetX == scissorBox_.offsetX) && (clearScissor_.offsetY == scissorBox_.offsetY) &&
1252             (clearScissor_.extentWidth == scissorBox_.extentWidth) &&
1253             (clearScissor_.extentHeight == scissorBox_.extentHeight)) {
1254             // Current scissors match clearscissor area, so no need to set it again.
1255             clearScissorSet_ = false;
1256         }
1257     }
1258 }
1259 
ClearScissorSet()1260 void RenderBackendGLES::ClearScissorSet()
1261 {
1262     if (clearScissorSet_) {       // do we need to set clear scissors.
1263         clearScissorSet_ = false; // clear scissors have been set now.
1264         resetScissor_ = true;     // we are modifying scissors, so remember to reset them afterwards.
1265         glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1266             static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1267     }
1268 }
1269 
ClearScissorReset()1270 void RenderBackendGLES::ClearScissorReset()
1271 {
1272     if (resetScissor_) { // need to reset correct scissors?
1273         if (!scissorPrimed_) {
1274             // scissors have not been set yet, so use clearbox as current cache state (and don't change scissor
1275             // setting)
1276             scissorPrimed_ = true;
1277             glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1278                 static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1279             scissorBox_.offsetX = clearScissor_.offsetX;
1280             scissorBox_.offsetY = clearScissor_.offsetY;
1281             scissorBox_.extentHeight = clearScissor_.extentHeight;
1282             scissorBox_.extentWidth = clearScissor_.extentWidth;
1283         } else {
1284             // Restore scissor box to cached state. (update scissors when needed, since clearBox != scissorBox)
1285             glScissor(static_cast<GLint>(scissorBox_.offsetX), static_cast<GLint>(scissorBox_.offsetY),
1286                 static_cast<GLsizei>(scissorBox_.extentWidth), static_cast<GLsizei>(scissorBox_.extentHeight));
1287         }
1288     }
1289 }
1290 
HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc * > colorAttachments)1291 void RenderBackendGLES::HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)
1292 {
1293     constexpr ColorComponentFlags clearAll = CORE_COLOR_COMPONENT_R_BIT | CORE_COLOR_COMPONENT_G_BIT |
1294                                              CORE_COLOR_COMPONENT_B_BIT | CORE_COLOR_COMPONENT_A_BIT;
1295     const auto& cBlend = cacheState_.colorBlendState;
1296     for (uint32_t idx = 0; idx < colorAttachments.size(); ++idx) {
1297         if (colorAttachments[idx] == nullptr) {
1298             continue;
1299         }
1300         const auto& ref = *(colorAttachments[idx]);
1301         if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1302             const auto& cBlendState = cBlend.colorAttachments[idx];
1303             if (clearAll != cBlendState.colorWriteMask) {
1304                 glColorMaski(idx, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1305             }
1306             ClearScissorSet();
1307             // glClearBufferfv only for float formats?
1308             // glClearBufferiv & glClearbufferuv only for integer formats?
1309             glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), ref.clearValue.color.float32);
1310             if (clearAll != cBlendState.colorWriteMask) {
1311                 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1312                 glColorMaski(idx, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
1313                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
1314                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
1315                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
1316             }
1317         }
1318     }
1319 }
1320 
HandleDepthAttachment(const RenderPassDesc::AttachmentDesc & depthAttachment)1321 void RenderBackendGLES::HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)
1322 {
1323     const GLuint allBits = 0xFFFFFFFFu;
1324     const auto& ref = depthAttachment;
1325     const bool clearDepth = (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1326     const bool clearStencil = (ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1327     // Change state if needed.
1328     if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1329         glDepthMask(GL_TRUE);
1330     }
1331     if (clearStencil) {
1332         if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1333             glStencilMaskSeparate(GL_FRONT, allBits);
1334         }
1335         if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1336             glStencilMaskSeparate(GL_BACK, allBits);
1337         }
1338     }
1339     if (clearDepth || clearStencil) {
1340         // Set the scissors for clear..
1341         ClearScissorSet();
1342     }
1343     // Do clears.
1344     if (clearDepth && clearStencil) {
1345         glClearBufferfi(GL_DEPTH_STENCIL, 0, ref.clearValue.depthStencil.depth,
1346             static_cast<GLint>(ref.clearValue.depthStencil.stencil));
1347     } else if (clearDepth) {
1348         glClearBufferfv(GL_DEPTH, 0, &ref.clearValue.depthStencil.depth);
1349     } else if (clearStencil) {
1350         glClearBufferiv(GL_STENCIL, 0, reinterpret_cast<const GLint*>(&ref.clearValue.depthStencil.stencil));
1351     }
1352 
1353     // Restore cached state, if we touched the state.
1354     if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1355         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1356         glDepthMask(GL_FALSE);
1357     }
1358     if (clearStencil) {
1359         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1360         if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1361             glStencilMaskSeparate(GL_FRONT, cacheState_.depthStencilState.frontStencilOpState.writeMask);
1362         }
1363         if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1364             glStencilMaskSeparate(GL_BACK, cacheState_.depthStencilState.backStencilOpState.writeMask);
1365         }
1366     }
1367 }
1368 
DoSubPass(uint32_t subPass)1369 void RenderBackendGLES::DoSubPass(uint32_t subPass)
1370 {
1371     if (currentFrameBuffer_ == nullptr) {
1372         // Completely invalid state in backend.
1373         return;
1374     }
1375     const auto& rpd = activeRenderPass_.renderPassDesc;
1376     const auto& sb = activeRenderPass_.subpasses[subPass];
1377 
1378     // If there's no FBO activate the with swapchain handle so that drawing happens to the correct surface.
1379     if (!currentFrameBuffer_->fbos[subPass].fbo && (sb.colorAttachmentCount == 1U)) {
1380         auto color = rpd.attachmentHandles[sb.colorAttachmentIndices[0]];
1381         device_.Activate(color);
1382     }
1383     device_.BindFrameBuffer(currentFrameBuffer_->fbos[subPass].fbo);
1384     ClearScissorInit(renderArea_);
1385     if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1386         SetState(GL_RASTERIZER_DISCARD, GL_FALSE);
1387     }
1388     {
1389         // NOTE: clear is not yet optimal. depth, stencil and color should be cleared using ONE glClear call if
1390         // possible. (ie. all buffers at once)
1391         renderingToDefaultFbo_ = false;
1392         if (sb.colorAttachmentCount > 0) {
1393             // collect color attachment infos..
1394             const RenderPassDesc::AttachmentDesc*
1395                 colorAttachments[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1396             for (uint32_t ci = 0; ci < sb.colorAttachmentCount; ci++) {
1397                 uint32_t index = sb.colorAttachmentIndices[ci];
1398                 if (resolveToBackbuffer_[index]) {
1399                     // NOTE: this could fail with multiple color attachments....
1400                     renderingToDefaultFbo_ = true;
1401                 }
1402                 if (!attachmentCleared_[index]) {
1403                     attachmentCleared_[index] = true;
1404                     colorAttachments[ci] = &rpd.attachments[index];
1405                 } else {
1406                     colorAttachments[ci] = nullptr;
1407                 }
1408             }
1409             HandleColorAttachments(array_view(colorAttachments, sb.colorAttachmentCount));
1410         }
1411         if (sb.depthAttachmentCount) {
1412             if (!attachmentCleared_[sb.depthAttachmentIndex]) {
1413                 attachmentCleared_[sb.depthAttachmentIndex] = true;
1414                 HandleDepthAttachment(rpd.attachments[sb.depthAttachmentIndex]);
1415             }
1416         }
1417     }
1418     if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1419         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1420         SetState(GL_RASTERIZER_DISCARD, GL_TRUE);
1421     }
1422     ClearScissorReset();
1423 
1424     if (viewportPending_) {
1425         viewportPending_ = false;
1426         // Handle top-left / bottom-left origin conversion
1427         auto y = static_cast<GLint>(viewport_.y);
1428         const auto h = static_cast<GLsizei>(viewport_.height);
1429         if (renderingToDefaultFbo_) {
1430             const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
1431             y = fh - (y + h);
1432         }
1433         glViewport(static_cast<GLint>(viewport_.x), y, static_cast<GLsizei>(viewport_.width), h);
1434     }
1435 }
1436 
ScanPasses(const RenderPassDesc & rpd)1437 void RenderBackendGLES::ScanPasses(const RenderPassDesc& rpd)
1438 {
1439     for (uint32_t sub = 0; sub < rpd.subpassCount; sub++) {
1440         const auto& currentSubPass = activeRenderPass_.subpasses[sub];
1441         for (uint32_t ci = 0; ci < currentSubPass.resolveAttachmentCount; ci++) {
1442             const uint32_t resolveTo = currentSubPass.resolveAttachmentIndices[ci];
1443             if (!attachmentImage_[resolveTo]) {
1444                 PLUGIN_LOG_ONCE_E(to_string(resolveTo), "Missing attachment %u", resolveTo);
1445                 continue;
1446             }
1447             if (attachmentFirstUse_[resolveTo] == 0xFFFFFFFF) {
1448                 attachmentFirstUse_[resolveTo] = sub;
1449             }
1450             attachmentLastUse_[resolveTo] = sub;
1451             const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[resolveTo]->GetPlatformData());
1452             if ((p.image == 0) && (p.renderBuffer == 0)) {
1453                 // mark the "resolveFrom" (ie. the colorattachment) as "backbuffer-like", since we resolve to
1454                 // backbuffer...
1455                 uint32_t resolveFrom = currentSubPass.colorAttachmentIndices[ci];
1456                 resolveToBackbuffer_[resolveFrom] = true;
1457             }
1458         }
1459         for (uint32_t ci = 0; ci < currentSubPass.inputAttachmentCount; ci++) {
1460             uint32_t index = currentSubPass.inputAttachmentIndices[ci];
1461             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1462                 attachmentFirstUse_[index] = sub;
1463             }
1464             attachmentLastUse_[index] = sub;
1465         }
1466         for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1467             uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1468             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1469                 attachmentFirstUse_[index] = sub;
1470             }
1471             attachmentLastUse_[index] = sub;
1472             if (attachmentImage_[index]) {
1473                 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[index]->GetPlatformData());
1474                 if ((p.image == 0) && (p.renderBuffer == 0)) {
1475                     resolveToBackbuffer_[index] = true;
1476                 }
1477             }
1478         }
1479         if (currentSubPass.depthAttachmentCount > 0) {
1480             uint32_t index = currentSubPass.depthAttachmentIndex;
1481             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1482                 attachmentFirstUse_[index] = sub;
1483             }
1484             attachmentLastUse_[index] = sub;
1485         }
1486     }
1487 }
1488 
RenderCommandBeginRenderPass(const RenderCommandWithType & ref)1489 void RenderBackendGLES::RenderCommandBeginRenderPass(const RenderCommandWithType& ref)
1490 {
1491     PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
1492     const auto& renderCmd = *static_cast<const struct RenderCommandBeginRenderPass*>(ref.rc);
1493     switch (renderCmd.beginType) {
1494         case RenderPassBeginType::RENDER_PASS_BEGIN: {
1495             ++inRenderpass_;
1496             PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES beginrenderpass mInRenderpass %u", inRenderpass_);
1497             activeRenderPass_ = renderCmd; // Store this because we need it later (in NextRenderPass)
1498 
1499             const auto& rpd = activeRenderPass_.renderPassDesc;
1500             renderArea_ = rpd.renderArea; // can subpasses have different render areas?
1501             auto& cpm = *(static_cast<NodeContextPoolManagerGLES*>(managers_.poolMgr));
1502             if (multisampledRenderToTexture_) {
1503                 cpm.FilterRenderPass(activeRenderPass_);
1504             }
1505             currentFrameBuffer_ = cpm.GetFramebuffer(cpm.GetFramebufferHandle(activeRenderPass_));
1506             if (!currentFrameBuffer_) {
1507                 // Completely invalid state in backend.
1508                 commandListValid_ = false;
1509                 --inRenderpass_;
1510                 return;
1511             }
1512             PLUGIN_ASSERT_MSG(
1513                 activeRenderPass_.subpassStartIndex == 0, "activeRenderPass_.subpassStartIndex != 0 not handled!");
1514             currentSubPass_ = 0;
1515             // find first and last use, clear clearflags. (this could be cached in the lowlewel classes)
1516             for (uint32_t i = 0; i < rpd.attachmentCount; i++) {
1517                 attachmentCleared_[i] = false;
1518                 attachmentFirstUse_[i] = 0xFFFFFFFF;
1519                 attachmentLastUse_[i] = 0;
1520                 resolveToBackbuffer_[i] = false;
1521                 attachmentImage_[i] =
1522                     static_cast<const GpuImageGLES*>(gpuResourceMgr_.GetImage(rpd.attachmentHandles[i]));
1523             }
1524             ScanPasses(rpd);
1525             DoSubPass(0);
1526 #if (RENDER_PERF_ENABLED == 1)
1527             ++perfCounters_.renderPassCount;
1528 #endif
1529         } break;
1530 
1531         case RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN: {
1532             currentSubPass_ = renderCmd.subpassStartIndex;
1533             PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1534             if (!currentFrameBuffer_) {
1535                 // Completely invalid state in backend.
1536                 commandListValid_ = false;
1537                 return;
1538             }
1539             DoSubPass(activeRenderPass_.subpassStartIndex);
1540         } break;
1541 
1542         default:
1543             break;
1544     }
1545 }
1546 
RenderCommandNextSubpass(const RenderCommandWithType & ref)1547 void RenderBackendGLES::RenderCommandNextSubpass(const RenderCommandWithType& ref)
1548 {
1549     PLUGIN_ASSERT(ref.type == RenderCommandType::NEXT_SUBPASS);
1550     const auto& renderCmd = *static_cast<const struct RenderCommandNextSubpass*>(ref.rc);
1551     PLUGIN_UNUSED(renderCmd);
1552     PLUGIN_ASSERT(renderCmd.subpassContents == SubpassContents::CORE_SUBPASS_CONTENTS_INLINE);
1553     ++currentSubPass_;
1554     PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1555     DoSubPass(currentSubPass_);
1556 }
1557 
InvalidateDepthStencil(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1558 int32_t RenderBackendGLES::InvalidateDepthStencil(
1559     array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1560 {
1561     int32_t depthCount = 0;
1562     if (currentSubPass.depthAttachmentCount == 0) {
1563         return depthCount; // early out
1564     }
1565     const uint32_t index = currentSubPass.depthAttachmentIndex;
1566     if (attachmentLastUse_[index] != currentSubPass_) {
1567         return depthCount; // early out
1568     }
1569     // is last use of the attachment
1570     const auto& image = attachmentImage_[index];
1571     const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1572     // NOTE: we expect the depth to be in FBO in this case even if there would be a depth target in render pass
1573     if ((dplat.image || dplat.renderBuffer) && (!renderingToDefaultFbo_)) {
1574         bool depth = false;
1575         bool stencil = false;
1576         if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1577             if ((dplat.format == GL_DEPTH_COMPONENT) || (dplat.format == GL_DEPTH_STENCIL)) {
1578                 depth = true;
1579             }
1580         }
1581         if (rpd.attachments[index].stencilStoreOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1582             if ((dplat.format == GL_STENCIL) || (dplat.format == GL_DEPTH_STENCIL)) {
1583                 stencil = true;
1584             }
1585         }
1586         if (depth && stencil) {
1587             invalidateAttachment[0] = GL_DEPTH_STENCIL_ATTACHMENT;
1588             depthCount++;
1589         } else if (stencil) {
1590             invalidateAttachment[0] = GL_STENCIL_ATTACHMENT;
1591             depthCount++;
1592         } else if (depth) {
1593             invalidateAttachment[0] = GL_DEPTH_ATTACHMENT;
1594             depthCount++;
1595         }
1596     }
1597     return depthCount;
1598 }
1599 
InvalidateColor(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1600 int32_t RenderBackendGLES::InvalidateColor(
1601     array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1602 {
1603     int32_t colorCount = 0;
1604     // see which parts of the fbo can be invalidated...
1605     // collect color attachment infos..
1606     for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1607         const uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1608         if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1609             if (const auto* image = attachmentImage_[index]) {
1610                 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1611                 if (dplat.image || dplat.renderBuffer) {
1612                     if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1613                         invalidateAttachment[static_cast<size_t>(colorCount)] = GL_COLOR_ATTACHMENT0 + ci;
1614                         colorCount++;
1615                     }
1616                 }
1617             }
1618         }
1619     }
1620     return colorCount;
1621 }
1622 
ResolveMSAA(const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1623 uint32_t RenderBackendGLES::ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1624 {
1625     const GLbitfield mask = ((currentSubPass.resolveAttachmentCount > 0u) ? GL_COLOR_BUFFER_BIT : 0u) |
1626                             ((currentSubPass.depthResolveAttachmentCount > 0u) ? GL_DEPTH_BUFFER_BIT : 0u);
1627     if (!mask) {
1628         return GL_FRAMEBUFFER;
1629     }
1630 
1631     if (scissorEnabled_) {
1632         glDisable(GL_SCISSOR_TEST);
1633         scissorEnabled_ = false;
1634     }
1635 
1636     // Resolve MSAA buffers.
1637     // NOTE: ARM recommends NOT to use glBlitFramebuffer here
1638     if (!currentSubPass.viewMask) {
1639         device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1640         device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1641 
1642         glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1643             static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1644             static_cast<GLint>(currentFrameBuffer_->height), mask, GL_NEAREST);
1645     } else {
1646         // Layers need to be resolved one by one. Create temporary FBOs and go through the layers.
1647         GLuint frameBuffers[2U]; // 2: buffer size
1648         glGenFramebuffers(2, frameBuffers); // 2: buffer size
1649         device_.BindReadFrameBuffer(frameBuffers[0U]);
1650         device_.BindWriteFrameBuffer(frameBuffers[1U]);
1651 
1652         const auto& srcImage =
1653             gpuResourceMgr_.GetImage(rpd.attachmentHandles[currentSubPass.colorAttachmentIndices[0U]]);
1654         if (srcImage == nullptr) {
1655             return GL_FRAMEBUFFER;
1656         }
1657         const auto& srcPlat = static_cast<const GpuImagePlatformDataGL&>(srcImage->GetBasePlatformData());
1658         const auto& dstImage =
1659             gpuResourceMgr_.GetImage(rpd.attachmentHandles[currentSubPass.resolveAttachmentIndices[0U]]);
1660         if (dstImage == nullptr) {
1661             return GL_FRAMEBUFFER;
1662         }
1663         const auto& dstPlat = static_cast<const GpuImagePlatformDataGL&>(dstImage->GetBasePlatformData());
1664         auto viewMask = currentSubPass.viewMask;
1665         auto layer = 0;
1666         while (viewMask) {
1667             glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcPlat.image, 0, layer);
1668             glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstPlat.image, 0, layer);
1669 
1670             glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1671                 static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1672                 static_cast<GLint>(currentFrameBuffer_->height), mask, GL_NEAREST);
1673             viewMask >>= 1U;
1674             ++layer;
1675         }
1676         glDeleteFramebuffers(2, frameBuffers); // 2: buffer size
1677 
1678         // invalidation exepcts to find the actual FBOs
1679         device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1680         device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1681     }
1682     return GL_READ_FRAMEBUFFER;
1683 }
1684 
RenderCommandEndRenderPass(const RenderCommandWithType & ref)1685 void RenderBackendGLES::RenderCommandEndRenderPass(const RenderCommandWithType& ref)
1686 {
1687     PLUGIN_ASSERT(ref.type == RenderCommandType::END_RENDER_PASS);
1688     const auto& renderCmd = *static_cast<const struct RenderCommandEndRenderPass*>(ref.rc);
1689     if (renderCmd.endType == RenderPassEndType::END_RENDER_PASS) {
1690         PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES endrenderpass mInRenderpass %u", inRenderpass_);
1691         inRenderpass_--;
1692     }
1693     if (currentFrameBuffer_ == nullptr) {
1694         // Completely invalid state in backend.
1695         return;
1696     }
1697     const auto& rpd = activeRenderPass_.renderPassDesc;
1698     const auto& currentSubPass = activeRenderPass_.subpasses[currentSubPass_];
1699 
1700     // Resolve MSAA
1701     const uint32_t fbType = ResolveMSAA(rpd, currentSubPass);
1702 
1703     // Finally invalidate color and depth..
1704     GLenum invalidate[PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT + 1] = {};
1705     int32_t invalidateCount = InvalidateColor(invalidate, rpd, currentSubPass);
1706     invalidateCount += InvalidateDepthStencil(
1707         array_view(invalidate + invalidateCount, countof(invalidate) - invalidateCount), rpd, currentSubPass);
1708 
1709     // NOTE: all attachments should be the same size AND mCurrentFrameBuffer->width/height should match that!
1710     Invalidate(fbType, invalidateCount, invalidate, rpd, *currentFrameBuffer_);
1711 
1712     if (inRenderpass_ == 0) {
1713         currentFrameBuffer_ = nullptr;
1714     }
1715 }
1716 
RenderCommandBindVertexBuffers(const RenderCommandWithType & ref)1717 void RenderBackendGLES::RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)
1718 {
1719     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_VERTEX_BUFFERS);
1720     const auto& renderCmd = *static_cast<const struct RenderCommandBindVertexBuffers*>(ref.rc);
1721     PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1722     PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1723     if (!boundGraphicsPipeline_ || !boundShaderProgram_) {
1724         return;
1725     }
1726     vertexAttribBinds_ = renderCmd.vertexBufferCount;
1727     for (size_t i = 0; i < renderCmd.vertexBufferCount; i++) {
1728         const auto& currVb = renderCmd.vertexBuffers[i];
1729         if (const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(currVb.bufferHandle); gpuBuffer) {
1730             const auto& plat = gpuBuffer->GetPlatformData();
1731             uintptr_t offset = currVb.bufferOffset;
1732             offset += plat.currentByteOffset;
1733             vertexAttribBindSlots_[i].id = plat.buffer;
1734             vertexAttribBindSlots_[i].offset = static_cast<intptr_t>(offset);
1735         } else {
1736             vertexAttribBindSlots_[i].id = 0;
1737             vertexAttribBindSlots_[i].offset = 0;
1738         }
1739     }
1740     vertexBufferUpdate_ = true;
1741 }
1742 
RenderCommandBindIndexBuffer(const RenderCommandWithType & ref)1743 void RenderBackendGLES::RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)
1744 {
1745     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_INDEX_BUFFER);
1746     const auto& renderCmd = *static_cast<const struct RenderCommandBindIndexBuffer*>(ref.rc);
1747     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.indexBuffer.bufferHandle);
1748         gpuBuffer) {
1749         const auto& plat = gpuBuffer->GetPlatformData();
1750         boundIndexBuffer_.offset = renderCmd.indexBuffer.bufferOffset;
1751         boundIndexBuffer_.offset += plat.currentByteOffset;
1752         boundIndexBuffer_.type = renderCmd.indexBuffer.indexType;
1753         boundIndexBuffer_.id = plat.buffer;
1754     }
1755     indexBufferUpdate_ = true;
1756 }
1757 
RenderCommandBlitImage(const RenderCommandWithType & ref)1758 void RenderBackendGLES::RenderCommandBlitImage(const RenderCommandWithType& ref)
1759 {
1760     PLUGIN_ASSERT(ref.type == RenderCommandType::BLIT_IMAGE);
1761     const auto& renderCmd = *static_cast<const struct RenderCommandBlitImage*>(ref.rc);
1762     const auto* srcImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1763     const auto* dstImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1764     if ((srcImage == nullptr) || (dstImage == nullptr)) {
1765         return;
1766     }
1767     const auto& srcDesc = srcImage->GetDesc();
1768     const auto& srcPlat = srcImage->GetPlatformData();
1769     const auto& dstDesc = dstImage->GetDesc();
1770     const auto& dstPlat = dstImage->GetPlatformData();
1771     const auto& srcRect = renderCmd.imageBlit.srcOffsets;
1772     const auto& dstRect = renderCmd.imageBlit.dstOffsets;
1773     const auto& src = renderCmd.imageBlit.srcSubresource;
1774     const auto& dst = renderCmd.imageBlit.dstSubresource;
1775     const auto srcMipLevel = static_cast<GLint>(src.mipLevel);
1776     const auto dstMipLevel = static_cast<GLint>(dst.mipLevel);
1777     const auto srcSampleCount = static_cast<uint32_t>(srcDesc.sampleCountFlags);
1778     const auto dstSampleCount = static_cast<uint32_t>(dstDesc.sampleCountFlags);
1779     PLUGIN_ASSERT_MSG(src.layerCount == dst.layerCount, "Source and Destination layercounts do not match!");
1780     PLUGIN_ASSERT_MSG(inRenderpass_ == 0, "RenderCommandBlitImage while inRenderPass");
1781     glDisable(GL_SCISSOR_TEST);
1782     scissorEnabled_ = false;
1783     // NOTE: LAYERS! (texture arrays)
1784     device_.BindReadFrameBuffer(blitImageSourceFbo_);
1785     device_.BindWriteFrameBuffer(blitImageDestinationFbo_);
1786     for (uint32_t layer = 0; layer < src.layerCount; layer++) {
1787         const GLenum srcType = GetTarget(srcPlat.type, layer, srcSampleCount);
1788         const GLenum dstType = GetTarget(dstPlat.type, layer, dstSampleCount);
1789         // glFramebufferTextureLayer for array textures....
1790         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, srcPlat.image, srcMipLevel);
1791         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, dstPlat.image, dstMipLevel);
1792         DoBlit(renderCmd.filter, { src.mipLevel, srcRect[0], srcRect[1], srcDesc.height },
1793             { dst.mipLevel, dstRect[0], dstRect[1], dstDesc.height });
1794         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, 0, 0);
1795         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, 0, 0);
1796     }
1797 }
1798 
RenderCommandCopyBuffer(const RenderCommandWithType & ref)1799 void RenderBackendGLES::RenderCommandCopyBuffer(const RenderCommandWithType& ref)
1800 {
1801     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER);
1802     const auto& renderCmd = *static_cast<const struct RenderCommandCopyBuffer*>(ref.rc);
1803     const auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1804     const auto* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.dstHandle);
1805     if (srcGpuBuffer && dstGpuBuffer) {
1806         const auto& srcData = srcGpuBuffer->GetPlatformData();
1807         const auto& dstData = dstGpuBuffer->GetPlatformData();
1808         const auto oldBindR = device_.BoundBuffer(GL_COPY_READ_BUFFER);
1809         const auto oldBindW = device_.BoundBuffer(GL_COPY_WRITE_BUFFER);
1810         device_.BindBuffer(GL_COPY_READ_BUFFER, srcData.buffer);
1811         device_.BindBuffer(GL_COPY_WRITE_BUFFER, dstData.buffer);
1812         glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
1813             static_cast<GLintptr>(renderCmd.bufferCopy.srcOffset),
1814             static_cast<GLintptr>(renderCmd.bufferCopy.dstOffset), static_cast<GLsizeiptr>(renderCmd.bufferCopy.size));
1815         device_.BindBuffer(GL_COPY_READ_BUFFER, oldBindR);
1816         device_.BindBuffer(GL_COPY_WRITE_BUFFER, oldBindW);
1817     }
1818 }
1819 
BufferToImageCopy(const struct RenderCommandCopyBufferImage & renderCmd)1820 void RenderBackendGLES::BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1821 {
1822 #if (RENDER_HAS_GLES_BACKEND == 1) & defined(_WIN32)
1823     // use the workaround only for gles backend on windows. (pvr simulator bug)
1824     constexpr const bool usePixelUnpackBuffer = false;
1825 #else
1826     // expect this to work, and the nvidia bug to be fixed.
1827     constexpr const bool usePixelUnpackBuffer = true;
1828 #endif
1829     auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1830     auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1831     if ((srcGpuBuffer == nullptr) || (dstGpuImage == nullptr)) {
1832         return;
1833     }
1834     const auto info = SetupBlit<usePixelUnpackBuffer>(device_, renderCmd.bufferImageCopy, *srcGpuBuffer, *dstGpuImage);
1835     if (info.iPlat.type == GL_TEXTURE_CUBE_MAP) {
1836         BlitCube(device_, info);
1837     } else if (info.iPlat.type == GL_TEXTURE_2D) {
1838         Blit2D(device_, info);
1839     } else if (info.iPlat.type == GL_TEXTURE_2D_ARRAY) {
1840         BlitArray(device_, info);
1841     } else if (info.iPlat.type == GL_TEXTURE_3D) {
1842         Blit3D(device_, info);
1843 #if RENDER_HAS_GLES_BACKEND
1844     } else if (info.iPlat.type == GL_TEXTURE_EXTERNAL_OES) {
1845         PLUGIN_LOG_E("Tried to copy to GL_TEXTURE_EXTERNAL_OES. Ignored!");
1846 #endif
1847     } else {
1848         PLUGIN_ASSERT_MSG(false, "RenderCommandCopyBufferImage unhandled type");
1849     }
1850     FinishBlit<usePixelUnpackBuffer>(device_, *srcGpuBuffer);
1851 }
1852 
ImageToBufferCopy(const struct RenderCommandCopyBufferImage & renderCmd)1853 void RenderBackendGLES::ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1854 {
1855     const auto& bc = renderCmd.bufferImageCopy;
1856     const auto* srcGpuImage = static_cast<GpuImageGLES*>(gpuResourceMgr_.GetImage(renderCmd.srcHandle));
1857     const auto* dstGpuBuffer = static_cast<GpuBufferGLES*>(gpuResourceMgr_.GetBuffer(renderCmd.dstHandle));
1858     PLUGIN_ASSERT(srcGpuImage);
1859     PLUGIN_ASSERT(dstGpuBuffer);
1860     if ((srcGpuImage == nullptr) || (dstGpuBuffer == nullptr)) {
1861         return;
1862     }
1863     const auto& iPlat = static_cast<const GpuImagePlatformDataGL&>(srcGpuImage->GetPlatformData());
1864     const auto& bPlat = static_cast<const GpuBufferPlatformDataGL&>(dstGpuBuffer->GetPlatformData());
1865     if ((iPlat.type != GL_TEXTURE_CUBE_MAP) && (iPlat.type != GL_TEXTURE_2D)) {
1866         PLUGIN_LOG_E("Unsupported texture type in ImageToBufferCopy %x", iPlat.type);
1867         return;
1868     }
1869     device_.BindReadFrameBuffer(blitImageSourceFbo_);
1870     PLUGIN_ASSERT(bc.imageSubresource.layerCount == 1);
1871     GLenum type = GL_TEXTURE_2D;
1872     if (iPlat.type == GL_TEXTURE_CUBE_MAP) {
1873         type = GetCubeMapTarget(iPlat.type, bc.imageSubresource.baseArrayLayer);
1874     }
1875     // glFramebufferTextureLayer for array textures....
1876     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, static_cast<GLuint>(iPlat.image),
1877         static_cast<GLint>(bc.imageSubresource.mipLevel));
1878     const Math::UVec2 sPos { bc.imageOffset.width, bc.imageOffset.height };
1879     const Math::UVec2 sExt { bc.imageExtent.width, bc.imageExtent.height };
1880     device_.BindBuffer(GL_PIXEL_PACK_BUFFER, bPlat.buffer);
1881     glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(bc.bufferRowLength));
1882     glPixelStorei(GL_PACK_ALIGNMENT, 1);
1883     uintptr_t dstOffset = bc.bufferOffset + bPlat.currentByteOffset;
1884     glReadnPixels(static_cast<GLint>(sPos.x), static_cast<GLint>(sPos.y), static_cast<GLsizei>(sExt.x),
1885         static_cast<GLsizei>(sExt.y), iPlat.format, static_cast<GLenum>(iPlat.dataType),
1886         static_cast<GLsizei>(bPlat.alignedByteSize), reinterpret_cast<void*>(dstOffset));
1887     device_.BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
1888     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, 0, 0);
1889 }
1890 
RenderCommandCopyBufferImage(const RenderCommandWithType & ref)1891 void RenderBackendGLES::RenderCommandCopyBufferImage(const RenderCommandWithType& ref)
1892 {
1893     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER_IMAGE);
1894     const auto& renderCmd = *static_cast<const struct RenderCommandCopyBufferImage*>(ref.rc);
1895     PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1896     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1897         BufferToImageCopy(renderCmd);
1898     } else if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1899         ImageToBufferCopy(renderCmd);
1900     }
1901 }
1902 
RenderCommandCopyImage(const RenderCommandWithType & ref)1903 void RenderBackendGLES::RenderCommandCopyImage(const RenderCommandWithType& ref)
1904 {
1905     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_IMAGE);
1906     const auto& renderCmd = *static_cast<const struct RenderCommandCopyImage*>(ref.rc);
1907     PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1908     const auto* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1909     const auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1910     if ((srcGpuImage == nullptr) || (dstGpuImage == nullptr)) {
1911         return;
1912     }
1913     const auto& srcDesc = srcGpuImage->GetDesc();
1914     const auto& dstDesc = dstGpuImage->GetDesc();
1915 #if RENDER_VALIDATION_ENABLED
1916     ValidateCopyImage(renderCmd.imageCopy, srcDesc, dstDesc);
1917 #endif
1918     const auto srcMipLevel =
1919         static_cast<GLint>(Math::min(renderCmd.imageCopy.srcSubresource.mipLevel, srcDesc.mipCount - 1));
1920     const auto dstMipLevel =
1921         static_cast<GLint>(Math::min(renderCmd.imageCopy.dstSubresource.mipLevel, dstDesc.mipCount - 1));
1922 
1923     auto sOffset = renderCmd.imageCopy.srcOffset;
1924     auto dOffset = renderCmd.imageCopy.dstOffset;
1925     auto size = renderCmd.imageCopy.extent;
1926 
1927     // clamp negative offsets to zero and adjust extent and other offset accordingly
1928     ClampOffset(sOffset, dOffset, size);
1929     ClampOffset(dOffset, sOffset, size);
1930 
1931     // clamp size to fit src and dst
1932     ClampSize(sOffset, srcDesc, size);
1933     ClampSize(dOffset, dstDesc, size);
1934 
1935     const auto& srcPlatData = srcGpuImage->GetPlatformData();
1936     const auto& dstPlatData = dstGpuImage->GetPlatformData();
1937     glCopyImageSubData(srcPlatData.image, srcPlatData.type, srcMipLevel, sOffset.x, sOffset.y, sOffset.z,
1938         dstPlatData.image, dstPlatData.type, dstMipLevel, dOffset.x, dOffset.y, dOffset.z,
1939         static_cast<GLsizei>(size.width), static_cast<GLsizei>(size.height), static_cast<GLsizei>(size.depth));
1940 }
1941 
RenderCommandBarrierPoint(const RenderCommandWithType & ref)1942 void RenderBackendGLES::RenderCommandBarrierPoint(const RenderCommandWithType& ref)
1943 {
1944     PLUGIN_ASSERT(ref.type == RenderCommandType::BARRIER_POINT);
1945     const auto& renderCmd = *static_cast<const struct RenderCommandBarrierPoint*>(ref.rc);
1946     const auto& rbList = *managers_.rbList;
1947     // NOTE: proper flagging of barriers.
1948     const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1949         rbList.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1950     if (!barrierPointBarriers) {
1951         return; // early out
1952     }
1953     const uint32_t barrierListCount = barrierPointBarriers->barrierListCount;
1954     const auto* nextBarrierList = barrierPointBarriers->firstBarrierList;
1955     GLbitfield barriers = 0;
1956     GLbitfield barriersByRegion = 0;
1957     for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1958         if (nextBarrierList == nullptr) {
1959             // cannot be null, just a safety
1960             PLUGIN_ASSERT(false);
1961             return;
1962         }
1963         const auto& barrierListRef = *nextBarrierList;
1964         nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1965         const uint32_t barrierCount = barrierListRef.count;
1966 
1967         for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1968             const auto& barrier = barrierListRef.commandBarriers[barrierIdx];
1969 
1970             // check if written by previous shader as an attachment or storage/ image buffer
1971             if (barrier.src.accessFlags & (CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1972                                               CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
1973                 const auto resourceHandle = barrier.resourceHandle;
1974                 const auto handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1975 
1976                 // barrier by region is between fragment shaders and supports a subset of barriers.
1977                 if ((barrier.src.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) &&
1978                     (barrier.dst.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
1979                     barriersByRegion |= CommonBarrierBits(barrier.dst.accessFlags, handleType);
1980                 } else {
1981                     // check the barriers shared with ByRegion
1982                     barriers |= CommonBarrierBits(barrier.dst.accessFlags, handleType);
1983 
1984                     // the rest are invalid for ByRegion
1985                     if (barrier.dst.accessFlags & CORE_ACCESS_INDIRECT_COMMAND_READ_BIT) {
1986                         barriers |= GL_COMMAND_BARRIER_BIT;
1987                     }
1988                     if (barrier.dst.accessFlags & CORE_ACCESS_INDEX_READ_BIT) {
1989                         barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
1990                     }
1991                     if (barrier.dst.accessFlags & CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
1992                         barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
1993                     }
1994                     // which are the correct accessFlags?
1995                     // GL_PIXEL_BUFFER_BARRIER_BIT:
1996                     // - buffer objects via the GL_PIXEL_PACK_BUFFER and GL_PIXEL_UNPACK_BUFFER bindings (via
1997                     // glReadPixels, glTexSubImage1D, etc.)
1998                     // GL_TEXTURE_UPDATE_BARRIER_BIT:
1999                     // - texture via glTex(Sub)Image*, glCopyTex(Sub)Image*, glCompressedTex(Sub)Image*, and
2000                     // reads via glGetTexImage GL_BUFFER_UPDATE_BARRIER_BIT:
2001                     // - glBufferSubData, glCopyBufferSubData, or glGetBufferSubData, or to buffer object memory
2002                     // mapped
2003                     //  by glMapBuffer or glMapBufferRange
2004                     // These two are cover all memory access, CORE_ACCESS_MEMORY_READ_BIT,
2005                     // CORE_ACCESS_MEMORY_WRITE_BIT?
2006                     if (barrier.dst.accessFlags & (CORE_ACCESS_TRANSFER_READ_BIT | CORE_ACCESS_TRANSFER_WRITE_BIT |
2007                                                       CORE_ACCESS_HOST_READ_BIT | CORE_ACCESS_HOST_WRITE_BIT)) {
2008                         if (handleType == RenderHandleType::GPU_IMAGE) {
2009                             barriers |= GL_TEXTURE_UPDATE_BARRIER_BIT;
2010                         } else if (handleType == RenderHandleType::GPU_BUFFER) {
2011                             barriers |= GL_BUFFER_UPDATE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT;
2012                         }
2013                     }
2014                     // GL_TRANSFORM_FEEDBACK_BARRIER_BIT is not used at the moment
2015                 }
2016             }
2017         }
2018     }
2019     if (barriers) {
2020         glMemoryBarrier(barriers);
2021     }
2022     if (barriersByRegion) {
2023         // only for fragment-fragment
2024         glMemoryBarrierByRegion(barriersByRegion);
2025     }
2026 }
2027 
UpdateGlobalDescriptorSets()2028 void RenderBackendGLES::UpdateGlobalDescriptorSets()
2029 {
2030     auto& descriptorSetMgr = static_cast<DescriptorSetManagerGles&>(device_.GetDescriptorSetManager());
2031 
2032     // Update global descset if needed
2033     const auto& allDescSets = descriptorSetMgr.GetUpdateDescriptorSetHandles();
2034     if (allDescSets.empty()) {
2035         return;
2036     }
2037     for (const auto& descHandle : allDescSets) {
2038         if (RenderHandleUtil::GetHandleType(descHandle) != RenderHandleType::DESCRIPTOR_SET) {
2039             continue;
2040         }
2041         descriptorSetMgr.UpdateDescriptorSetGpuHandle(descHandle);
2042     }
2043 #if RENDER_HAS_GLES_BACKEND
2044     oesBindingsChanged_ = true;
2045     oesBinds_.clear();
2046 #endif
2047 }
2048 
UpdateCommandListDescriptorSets(const RenderCommandList & renderCommandList,NodeContextDescriptorSetManager & ncdsm)2049 void RenderBackendGLES::UpdateCommandListDescriptorSets(
2050     const RenderCommandList& renderCommandList, NodeContextDescriptorSetManager& ncdsm)
2051 {
2052     const auto& allDescSets = renderCommandList.GetUpdateDescriptorSetHandles();
2053     if (allDescSets.empty()) {
2054         return;
2055     }
2056     for (const auto& descHandle : allDescSets) {
2057         if (RenderHandleUtil::GetHandleType(descHandle) != RenderHandleType::DESCRIPTOR_SET) {
2058             continue;
2059         }
2060         ncdsm.UpdateDescriptorSetGpuHandle(descHandle);
2061     }
2062 #if RENDER_HAS_GLES_BACKEND
2063     oesBindingsChanged_ = true;
2064     oesBinds_.clear();
2065 #endif
2066 }
2067 
RenderCommandBindDescriptorSets(const RenderCommandWithType & ref)2068 void RenderBackendGLES::RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)
2069 {
2070     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_DESCRIPTOR_SETS);
2071     if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2072         return;
2073     }
2074     const auto& renderCmd = *static_cast<const struct RenderCommandBindDescriptorSets*>(ref.rc);
2075     PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2076 
2077     const auto lastSet = renderCmd.firstSet + renderCmd.setCount;
2078     if ((renderCmd.firstSet >= PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT) ||
2079         (lastSet > PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT)) {
2080         return;
2081     }
2082     std::copy(renderCmd.descriptorSetHandles + renderCmd.firstSet, renderCmd.descriptorSetHandles + lastSet,
2083         descriptorSetHandles_ + renderCmd.firstSet);
2084     auto* dst = descriptorSetDynamicOffsets_ + renderCmd.firstSet;
2085     for (const auto &src : array_view(renderCmd.descriptorSetDynamicOffsets + renderCmd.firstSet,
2086                                       renderCmd.descriptorSetDynamicOffsets + lastSet)) {
2087         dst->dynamicOffsetCount = src.dynamicOffsetCount;
2088         std::copy(src.dynamicOffsets, src.dynamicOffsets + src.dynamicOffsetCount, dst->dynamicOffsets);
2089         ++dst;
2090     }
2091     firstSet_ = static_cast<uint16_t>(renderCmd.firstSet);
2092     setCount_ = static_cast<uint16_t>(renderCmd.setCount);
2093     descriptorUpdate_ = true;
2094 
2095 #if RENDER_HAS_GLES_BACKEND
2096     oesBinds_.clear();
2097     oesBindingsChanged_ = true;
2098     const auto& ncdsm = *static_cast<NodeContextDescriptorSetManagerGles*>(managers_.descriptorSetMgr);
2099     for (uint32_t set = firstSet_; set < setCount_; ++set) {
2100         const auto& descHandle = descriptorSetHandles_[set];
2101         if (!ncdsm.HasPlatformConversionBindings(descHandle)) {
2102             continue;
2103         }
2104         const auto& resources = ncdsm.GetResources(descHandle);
2105         for (uint32_t binding = 0U, count = resources.size(); binding < count; ++binding) {
2106             auto& bind = resources[binding];
2107             if ((bind.descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
2108                 (bind.descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
2109                 (bind.descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
2110                 if (bind.resources[0].image.mode & Gles::EXTERNAL_BIT) {
2111                     oesBinds_.push_back(OES_Bind { set, binding });
2112                 }
2113             }
2114         }
2115     }
2116 #endif
2117 }
2118 
SetPushConstant(uint32_t program,const Gles::PushConstantReflection & pc,const void * data)2119 void RenderBackendGLES::SetPushConstant(uint32_t program, const Gles::PushConstantReflection& pc, const void* data)
2120 {
2121     const auto location = static_cast<GLint>(pc.location);
2122     // the consts list has been filtered and cleared of unused uniforms.
2123     PLUGIN_ASSERT(location != Gles::INVALID_LOCATION);
2124     GLint count = Math::max(static_cast<GLint>(pc.arraySize), 1);
2125     switch (pc.type) {
2126         case GL_UNSIGNED_INT: {
2127             glProgramUniform1uiv(program, location, count, static_cast<const GLuint*>(data));
2128             break;
2129         }
2130         case GL_FLOAT: {
2131             glProgramUniform1fv(program, location, count, static_cast<const GLfloat*>(data));
2132             break;
2133         }
2134         case GL_FLOAT_VEC2: {
2135             glProgramUniform2fv(program, location, count, static_cast<const GLfloat*>(data));
2136             break;
2137         }
2138         case GL_FLOAT_VEC4: {
2139             glProgramUniform4fv(program, location, count, static_cast<const GLfloat*>(data));
2140             break;
2141         }
2142         case GL_FLOAT_MAT4: {
2143             glProgramUniformMatrix4fv(program, location, count, false, static_cast<const GLfloat*>(data));
2144             break;
2145         }
2146         case GL_UNSIGNED_INT_VEC4: {
2147             glProgramUniform4uiv(program, location, count, static_cast<const GLuint*>(data));
2148             break;
2149         }
2150         default:
2151             PLUGIN_ASSERT_MSG(false, "Unhandled pushconstant variable type");
2152     }
2153 }
2154 
SetPushConstants(uint32_t program,const array_view<Gles::PushConstantReflection> & consts)2155 void RenderBackendGLES::SetPushConstants(uint32_t program, const array_view<Gles::PushConstantReflection>& consts)
2156 {
2157     if (boundProgram_.setPushConstants) {
2158         boundProgram_.setPushConstants = false;
2159         const auto& renderCmd = boundProgram_.pushConstants;
2160         PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2161         PLUGIN_ASSERT_MSG(renderCmd.pushConstant.byteSize > 0, "PushConstant byteSize is zero!");
2162         PLUGIN_ASSERT_MSG(renderCmd.data, "PushConstant data is nullptr!");
2163         if ((renderCmd.data == nullptr) || (renderCmd.pushConstant.byteSize == 0))
2164             return;
2165         // ASSERT: expecting data is valid
2166         // NOTE: handle rest of the types
2167         for (const auto& pc : consts) {
2168             const size_t offs = pc.offset;
2169             if ((offs + pc.size) > renderCmd.pushConstant.byteSize) {
2170                 PLUGIN_LOG_E(
2171                     "pushConstant data invalid (data for %s is missing [offset:%zu size:%zu] byteSize of data:%u)",
2172                     pc.name.c_str(), pc.offset, pc.size, renderCmd.pushConstant.byteSize);
2173                 continue;
2174             }
2175             /*
2176             NOTE: handle the strides....
2177             consts[i].array_stride;
2178             consts[i].matrix_stride; */
2179             SetPushConstant(program, pc, &renderCmd.data[offs]);
2180         }
2181     }
2182 }
2183 
RenderCommandPushConstant(const RenderCommandWithType & ref)2184 void RenderBackendGLES::RenderCommandPushConstant(const RenderCommandWithType& ref)
2185 {
2186     PLUGIN_ASSERT(ref.type == RenderCommandType::PUSH_CONSTANT);
2187     if (!boundComputeProgram_ && !boundShaderProgram_) {
2188         return;
2189     }
2190     const auto& renderCmd = *static_cast<const struct RenderCommandPushConstant*>(ref.rc);
2191     if (renderCmd.pushConstant.byteSize > 0) {
2192         PLUGIN_ASSERT(renderCmd.data);
2193         PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2194         boundProgram_.setPushConstants = true;
2195         boundProgram_.pushConstants = renderCmd;
2196         if (boundComputeProgram_) {
2197             const auto& sd =
2198                 static_cast<const GpuComputeProgramPlatformDataGL&>(boundComputeProgram_->GetPlatformData());
2199             SetPushConstants(sd.program, sd.pushConstants);
2200         } else {
2201             const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(boundShaderProgram_->GetPlatformData());
2202             SetPushConstants(sd.program, sd.pushConstants);
2203         }
2204     }
2205 }
2206 
RenderCommandClearColorImage(const RenderCommandWithType & ref)2207 void RenderBackendGLES::RenderCommandClearColorImage(const RenderCommandWithType& ref)
2208 {
2209     PLUGIN_ASSERT(ref.type == RenderCommandType::CLEAR_COLOR_IMAGE);
2210 #if RENDER_HAS_GLES_BACKEND
2211 #if (RENDER_VALIDATION_ENABLED == 1)
2212     PLUGIN_LOG_ONCE_E("RenderBackendGLES::RenderCommandClearColorImage",
2213         "Render command clear color image not support with GLES. One should implement higher level path for "
2214         "clearing.");
2215 #endif
2216 #else
2217     const auto& renderCmd = *static_cast<const struct RenderCommandClearColorImage*>(ref.rc);
2218 
2219     const GpuImageGLES* imagePtr = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.handle);
2220     if (imagePtr) {
2221         const GpuImagePlatformDataGL& platImage = imagePtr->GetPlatformData();
2222         // NOTE: mip levels and array layers should be handled separately
2223         for (const auto& subresRef : renderCmd.ranges) {
2224             glClearTexImage(platImage.image,     // texture
2225                 (int32_t)subresRef.baseMipLevel, // level
2226                 platImage.format,                // format
2227                 platImage.dataType,              // type
2228                 &renderCmd.color);               // data
2229         }
2230     }
2231 #endif
2232 }
2233 
2234 // dynamic states
RenderCommandDynamicStateViewport(const RenderCommandWithType & ref)2235 void RenderBackendGLES::RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)
2236 {
2237     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_VIEWPORT);
2238     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateViewport*>(ref.rc);
2239     const ViewportDesc& vd = renderCmd.viewportDesc;
2240     SetViewport(vd);
2241 }
2242 
RenderCommandDynamicStateScissor(const RenderCommandWithType & ref)2243 void RenderBackendGLES::RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)
2244 {
2245     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_SCISSOR);
2246     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateScissor*>(ref.rc);
2247     const ScissorDesc& sd = renderCmd.scissorDesc;
2248     SetScissor(sd);
2249 }
2250 
RenderCommandDynamicStateLineWidth(const RenderCommandWithType & ref)2251 void RenderBackendGLES::RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)
2252 {
2253     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_LINE_WIDTH);
2254     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateLineWidth*>(ref.rc);
2255     if (renderCmd.lineWidth != cacheState_.rasterizationState.lineWidth) {
2256         cacheState_.rasterizationState.lineWidth = renderCmd.lineWidth;
2257         glLineWidth(renderCmd.lineWidth);
2258     }
2259 }
2260 
RenderCommandDynamicStateDepthBias(const RenderCommandWithType & ref)2261 void RenderBackendGLES::RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)
2262 {
2263     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS);
2264     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBias not implemented");
2265 }
2266 
RenderCommandDynamicStateBlendConstants(const RenderCommandWithType & ref)2267 void RenderBackendGLES::RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)
2268 {
2269     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS);
2270     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateBlendConstants not implemented");
2271 }
2272 
RenderCommandDynamicStateDepthBounds(const RenderCommandWithType & ref)2273 void RenderBackendGLES::RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)
2274 {
2275     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS);
2276     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBounds not implemented");
2277 }
2278 
SetStencilState(const uint32_t frontFlags,const GraphicsState::StencilOpState & front,const uint32_t backFlags,const GraphicsState::StencilOpState & back)2279 void RenderBackendGLES::SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front,
2280     const uint32_t backFlags, const GraphicsState::StencilOpState& back)
2281 {
2282     auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2283     auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2284     const uint32_t FUNCMASK =
2285         (StencilSetFlags::SETCOMPAREOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETREFERENCE);
2286     if (frontFlags & StencilSetFlags::SETWRITEMASK) {
2287         cFront.writeMask = front.writeMask;
2288         glStencilMaskSeparate(GL_FRONT, cFront.writeMask);
2289     }
2290     if (frontFlags & FUNCMASK) {
2291         SetStencilCompareOp(cFront, front);
2292         glStencilFuncSeparate(
2293             GL_FRONT, GetCompareOp(cFront.compareOp), static_cast<GLint>(cFront.reference), cFront.compareMask);
2294     }
2295     if (frontFlags & StencilSetFlags::SETOP) {
2296         SetStencilOp(cFront, front);
2297         glStencilOpSeparate(
2298             GL_FRONT, GetStencilOp(cFront.failOp), GetStencilOp(cFront.depthFailOp), GetStencilOp(cFront.passOp));
2299     }
2300     if (backFlags & StencilSetFlags::SETWRITEMASK) {
2301         cBack.writeMask = back.writeMask;
2302         glStencilMaskSeparate(GL_BACK, cBack.writeMask);
2303     }
2304     if (backFlags & FUNCMASK) {
2305         SetStencilCompareOp(cBack, back);
2306         glStencilFuncSeparate(
2307             GL_BACK, GetCompareOp(cBack.compareOp), static_cast<GLint>(cBack.reference), cBack.compareMask);
2308     }
2309     if (backFlags & StencilSetFlags::SETOP) {
2310         SetStencilOp(cBack, back);
2311         glStencilOpSeparate(
2312             GL_FRONT, GetStencilOp(cBack.failOp), GetStencilOp(cBack.depthFailOp), GetStencilOp(cBack.passOp));
2313     }
2314 }
2315 
RenderCommandDynamicStateStencil(const RenderCommandWithType & ref)2316 void RenderBackendGLES::RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)
2317 {
2318     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_STENCIL);
2319     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateStencil*>(ref.rc);
2320     auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2321     auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2322     uint32_t setFront = 0;
2323     uint32_t setBack = 0;
2324     if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_FRONT_BIT) {
2325         if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2326             if (renderCmd.mask != cFront.compareMask) {
2327                 cFront.compareMask = renderCmd.mask;
2328                 setFront |= StencilSetFlags::SETCOMPAREMASK;
2329             }
2330         } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2331             if (renderCmd.mask != cFront.writeMask) {
2332                 cFront.writeMask = renderCmd.mask;
2333                 setFront |= StencilSetFlags::SETWRITEMASK;
2334             }
2335         } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2336             if (renderCmd.mask != cFront.reference) {
2337                 cFront.reference = renderCmd.mask;
2338                 setFront |= StencilSetFlags::SETREFERENCE;
2339             }
2340         }
2341     }
2342     if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_BACK_BIT) {
2343         if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2344             if (renderCmd.mask != cBack.compareMask) {
2345                 cBack.compareMask = renderCmd.mask;
2346                 setBack |= StencilSetFlags::SETCOMPAREMASK;
2347             }
2348         } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2349             if (renderCmd.mask != cBack.writeMask) {
2350                 cBack.writeMask = renderCmd.mask;
2351                 setBack |= StencilSetFlags::SETWRITEMASK;
2352             }
2353         } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2354             if (renderCmd.mask != cBack.reference) {
2355                 cBack.reference = renderCmd.mask;
2356                 setBack |= StencilSetFlags::SETREFERENCE;
2357             }
2358         }
2359     }
2360     SetStencilState(setFront, cFront, setBack, cBack);
2361 }
2362 
RenderCommandFragmentShadingRate(const RenderCommandWithType & renderCmd)2363 void RenderBackendGLES::RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)
2364 {
2365 #if (RENDER_VALIDATION_ENABLED == 1)
2366     PLUGIN_LOG_ONCE_I("gles_RenderCommandFragmentShadingRate",
2367         "RENDER_VALIDATION: Fragment shading rate not available with GL(ES) backend.");
2368 #endif
2369 }
2370 
RenderCommandExecuteBackendFramePosition(const RenderCommandWithType & renderCmd)2371 void RenderBackendGLES::RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)
2372 {
2373     PLUGIN_ASSERT_MSG(false, "RenderCommandExecuteBackendFramePosition not implemented");
2374 }
2375 
RenderCommandWriteTimestamp(const RenderCommandWithType & renderCmd)2376 void RenderBackendGLES::RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)
2377 {
2378     PLUGIN_ASSERT_MSG(false, "RenderCommandWriteTimestamp not implemented");
2379 }
2380 
BindVertexInputs(const VertexInputDeclarationData & decldata,const array_view<const int32_t> & vertexInputs)2381 void RenderBackendGLES::BindVertexInputs(
2382     const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)
2383 {
2384     // update bindings for the VAO.
2385     // process with attribute descriptions to only bind the needed vertex buffers
2386     // NOTE: that there are or might be extra bindings in the decldata.bindingDescriptions,
2387     // but we only bind the ones needed for the shader
2388     const uint32_t minBinding = Math::min(vertexAttribBinds_, decldata.attributeDescriptionCount);
2389     for (uint32_t i = 0; i < minBinding; ++i) {
2390         const auto& attributeRef = decldata.attributeDescriptions[i];
2391         const uint32_t location = attributeRef.location;
2392         const uint32_t binding = attributeRef.binding;
2393         // NOTE: we need to bind all the buffers to the correct bindings.
2394         // shader optimized check (vertexInputs, some locations are not in use)
2395         if ((location != ~0u) && (binding != ~0u) && (vertexInputs[location] != Gles::INVALID_LOCATION)) {
2396             const auto& slot = vertexAttribBindSlots_[binding];
2397             const auto& bindingRef = decldata.bindingDescriptions[binding];
2398             PLUGIN_ASSERT(bindingRef.binding == binding);
2399             // buffer bound to slot, and it's used by the shader.
2400             device_.BindVertexBuffer(binding, slot.id, slot.offset, static_cast<intptr_t>(bindingRef.stride));
2401             /*
2402             core/vulkan
2403             bindingRef.vertexInputRate =  CORE_VERTEX_INPUT_RATE_VERTEX (0)  attribute index advances per vertex
2404             bindingRef.vertexInputRate =  CORE_VERTEX_INPUT_RATE_INSTANCE (1)  attribute index advances per instance
2405 
2406             gl/gles
2407             If divisor is  0, the attributes using the buffer bound to bindingindex advance once per vertex.
2408             If divisor is >0, the attributes advance once per divisor instances of the set(s) of vertices being
2409             rendered.
2410 
2411             so we can directly pass the inputRate as VertexBindingDivisor. (ie. advance once per instance)
2412             ie. enum happens to match and can simply cast.
2413             */
2414             static_assert(CORE_VERTEX_INPUT_RATE_VERTEX == 0 && CORE_VERTEX_INPUT_RATE_INSTANCE == 1);
2415             device_.VertexBindingDivisor(binding, static_cast<uint32_t>(bindingRef.vertexInputRate));
2416         }
2417     }
2418 }
2419 
BindSampler(const array_view<const Gles::Bind::Resource> resources,const Binder & binder,const array_view<const Slice> descriptorIndex,const array_view<const uint8_t> ids)2420 void RenderBackendGLES::BindSampler(const array_view<const Gles::Bind::Resource> resources, const Binder& binder,
2421     const array_view<const Slice> descriptorIndex, const array_view<const uint8_t> ids)
2422 {
2423     const auto end = Math::min(resources.size(), descriptorIndex.size());
2424     for (uint32_t index = 0; index < end; ++index) {
2425         const auto& idRange = descriptorIndex[index];
2426         if ((size_t(idRange.index) + idRange.count) > ids.size()) {
2427             continue;
2428         }
2429         const auto samplerId = resources[index].sampler.samplerId;
2430         for (const auto& id : array_view(ids.data() + idRange.index, idRange.count)) {
2431             const auto textureUnit = index + id;
2432 #if (RENDER_PERF_ENABLED == 1)
2433             if (device_.BoundSampler(textureUnit) != samplerId) {
2434                 ++perfCounters_.bindSampler;
2435             }
2436 #endif
2437             device_.BindSampler(textureUnit, samplerId);
2438         }
2439     }
2440 }
2441 
BindTexture(array_view<const Gles::Bind::Resource> resources,const Binder & binder,BASE_NS::array_view<const Slice> descriptorIndex,BASE_NS::array_view<const uint8_t> ids,DescriptorType descriptorType)2442 void RenderBackendGLES::BindTexture(array_view<const Gles::Bind::Resource> resources, const Binder& binder,
2443     BASE_NS::array_view<const Slice> descriptorIndex, BASE_NS::array_view<const uint8_t> ids,
2444     DescriptorType descriptorType)
2445 {
2446     const auto end = Math::min(resources.size(), descriptorIndex.size());
2447     for (uint32_t index = 0; index < end; ++index) {
2448         const auto& idRange = descriptorIndex[index];
2449         if ((size_t(idRange.index) + idRange.count) > ids.size()) {
2450             continue;
2451         }
2452         const auto& imgType = resources[index].image;
2453         if (!imgType.image) {
2454             continue;
2455         }
2456         auto& plat = imgType.image->GetPlatformData();
2457         for (const auto& id : array_view(ids.data() + idRange.index, idRange.count)) {
2458             const auto textureUnit = index + id;
2459             uint32_t samplerId = UINT32_MAX;
2460             if (descriptorType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2461                 samplerId = resources[index].sampler.samplerId;
2462             } else if (descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
2463                 samplerId = 0U;
2464             }
2465             if (samplerId != UINT32_MAX) {
2466 #if (RENDER_PERF_ENABLED == 1)
2467                 if (device_.BoundSampler(textureUnit) != samplerId) {
2468                     ++perfCounters_.bindSampler;
2469                 }
2470 #endif
2471                 device_.BindSampler(textureUnit, samplerId);
2472             }
2473             const auto baseLevel =
2474                 (imgType.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? imgType.mipLevel : 0U;
2475             if (descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
2476                 device_.BindImageTexture(
2477                     textureUnit, plat.image, baseLevel, false, 0, imgType.mode & 0xFFFF, plat.internalFormat);
2478             } else {
2479 #if (RENDER_PERF_ENABLED == 1)
2480                 if (device_.BoundTexture(textureUnit, plat.type) != plat.image) {
2481                     ++perfCounters_.bindTexture;
2482                 }
2483 #endif
2484                 device_.BindTexture(textureUnit, plat.type, plat.image);
2485                 // NOTE: the last setting is active, can not have different miplevels bound from single
2486                 // resource.
2487                 // Check and update (if needed) the forced miplevel.
2488                 if (plat.mipLevel != imgType.mipLevel) {
2489                     // NOTE: we are actually modifying the texture object bound above
2490                     const_cast<GpuImagePlatformDataGL&>(plat).mipLevel = imgType.mipLevel;
2491                     // either force the defined mip level or use defaults.
2492                     const auto maxLevel = static_cast<GLint>(
2493                         (plat.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? plat.mipLevel : 1000U);
2494                     glTexParameteri(plat.type, GL_TEXTURE_BASE_LEVEL, static_cast<GLint>(baseLevel));
2495                     glTexParameteri(plat.type, GL_TEXTURE_MAX_LEVEL, maxLevel);
2496                 }
2497             }
2498         }
2499     }
2500 }
2501 
BindBuffer(array_view<const Gles::Bind::Resource> resources,const Binder & binder,BASE_NS::array_view<const Slice> descriptorIndex,BASE_NS::array_view<const uint8_t> ids,DescriptorType descriptorType)2502 void RenderBackendGLES::BindBuffer(array_view<const Gles::Bind::Resource> resources, const Binder& binder,
2503     BASE_NS::array_view<const Slice> descriptorIndex, BASE_NS::array_view<const uint8_t> ids,
2504     DescriptorType descriptorType)
2505 {
2506     uint32_t target = 0U;
2507     if ((descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) ||
2508         (descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC)) {
2509         target = GL_UNIFORM_BUFFER;
2510     } else if ((descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) ||
2511                (descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
2512         target = GL_SHADER_STORAGE_BUFFER;
2513     }
2514     const auto end = Math::min(resources.size(), descriptorIndex.size());
2515     for (uint32_t index = 0; index < end; index++) {
2516         const auto& idRange = descriptorIndex[index];
2517         if ((size_t(idRange.index) + idRange.count) > ids.size()) {
2518             continue;
2519         }
2520         const auto& obj = resources[index];
2521         uint32_t dynOffset = 0U;
2522         if (auto& currentOffsets = descriptorSetDynamicOffsets_[binder.set]; currentOffsets.dynamicOffsetCount) {
2523             auto& currentIndex = dynamicOffsetIndices_[binder.bind];
2524             if (currentIndex < currentOffsets.dynamicOffsetCount) {
2525                 dynOffset = currentOffsets.dynamicOffsets[currentIndex];
2526             } else {
2527                 PLUGIN_LOG_E("outofoffsets");
2528             }
2529         }
2530 
2531         for (const auto& id : array_view(ids.data() + idRange.index, idRange.count)) {
2532             const auto binding = index + id;
2533 #if (RENDER_PERF_ENABLED == 1)
2534             if (device_.BoundBuffer(target) != obj.buffer.bufferId) {
2535                 ++perfCounters_.bindBuffer;
2536             }
2537 #endif
2538             device_.BindBufferRange(
2539                 target, binding, obj.buffer.bufferId, obj.buffer.offset + dynOffset, obj.buffer.size);
2540         }
2541     }
2542 }
2543 
BindResources()2544 void RenderBackendGLES::BindResources()
2545 {
2546     if (!descriptorUpdate_) {
2547         return;
2548     }
2549     descriptorUpdate_ = false;
2550     const ResourcesView* shaderBindings = nullptr;
2551     if (boundComputeProgram_) {
2552         shaderBindings =
2553             &static_cast<const GpuComputeProgramPlatformDataGL&>(boundComputeProgram_->GetPlatformData()).resourcesView;
2554     } else if (boundShaderProgram_) {
2555 #if RENDER_HAS_GLES_BACKEND
2556         if (oesBindingsChanged_) {
2557             oesBindingsChanged_ = false;
2558 
2559             // ask for a compatible program from the boundGraphicsPipeline_
2560             auto shader = boundGraphicsPipeline_->GetOESProgram(oesBinds_);
2561             if (!shader) {
2562                 return;
2563             }
2564             if (boundShaderProgram_ != shader) {
2565                 boundShaderProgram_ = shader;
2566                 const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
2567                 // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
2568                 const uint32_t program = sd.program;
2569 #if (RENDER_PERF_ENABLED == 1)
2570                 if (device_.BoundProgram() != program) {
2571                     ++perfCounters_.bindProgram;
2572                 }
2573 #endif
2574                 device_.UseProgram(program);
2575                 if (sd.flipLocation != Gles::INVALID_LOCATION) {
2576                     const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
2577                     glProgramUniform1fv(program, sd.flipLocation, 1, &flip);
2578                 }
2579             }
2580         }
2581 #endif
2582         shaderBindings =
2583             &static_cast<const GpuShaderProgramPlatformDataGL&>(boundShaderProgram_->GetPlatformData()).resourcesView;
2584     }
2585     if (!shaderBindings) {
2586         return;
2587     }
2588 
2589     const auto& ncdsm = *static_cast<NodeContextDescriptorSetManagerGles*>(managers_.descriptorSetMgr);
2590     uint32_t currentSet = UINT32_MAX;
2591     array_view<const Gles::Bind> descriptorSetResources;
2592     // with some bookkeeping might be possible to go through only descriptor sets based on previous
2593     // RenderCommandBindDescriptorSets instead of all sets.
2594     for (auto& binder : (shaderBindings->resourceList)) {
2595         // binders are in set - binding order. when set changes get the resources for the current set and gather dynamic
2596         // offsets
2597         if (binder.set != currentSet) {
2598             currentSet = binder.set;
2599             descriptorSetResources = ncdsm.GetResources(descriptorSetHandles_[binder.set]);
2600 
2601             // descriptorSetDynamicOffsets_ are only for dynamic buffers. figure out which index should be used for
2602             // which binding.
2603             dynamicOffsetIndices_.resize(descriptorSetResources.size());
2604             uint32_t index = 0U;
2605             std::transform(descriptorSetResources.cbegin(), descriptorSetResources.cend(),
2606                 dynamicOffsetIndices_.begin(), [&index](const Gles::Bind& bind) {
2607                     if ((bind.descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) ||
2608                         (bind.descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
2609                         return index++;
2610                     }
2611                     return 0U;
2612                 });
2613         }
2614         if (binder.bind >= descriptorSetResources.size()) {
2615             PLUGIN_LOG_W(
2616                 "Desctiptor count mismatch pipeline %u, binding %zu", binder.bind, descriptorSetResources.size());
2617             continue;
2618         }
2619         auto& curRes = descriptorSetResources[binder.bind];
2620 #if RENDER_VALIDATION_ENABLED
2621         if (binder.descriptors.count != curRes.resources.size()) {
2622             PLUGIN_LOG_W(
2623                 "Desctiptor size mismatch pipeline %u, binding %zu", binder.descriptors.count, curRes.resources.size());
2624         }
2625 
2626         auto descriptorType = curRes.descriptorType;
2627         if (descriptorType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
2628             descriptorType = CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
2629         } else if (descriptorType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2630             descriptorType = CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2631         }
2632         if (binder.type != descriptorType) {
2633             PLUGIN_LOG_W("Desctiptor TYPE mismatch pipeline %x, binding %x", binder.type, descriptorType);
2634         }
2635 #endif
2636         auto descriptorIndex =
2637             array_view(shaderBindings->descriptorIndexIds.data() + binder.descriptors.index, binder.descriptors.count);
2638         switch (curRes.descriptorType) {
2639             case CORE_DESCRIPTOR_TYPE_SAMPLER: {
2640                 BindSampler(curRes.resources, binder, descriptorIndex, shaderBindings->ids);
2641                 break;
2642             }
2643 
2644             case CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2645             case CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2646             case CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2647             case CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
2648                 BindTexture(curRes.resources, binder, descriptorIndex, shaderBindings->ids, curRes.descriptorType);
2649                 break;
2650             }
2651             case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2652             case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2653             case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2654             case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
2655                 BindBuffer(curRes.resources, binder, descriptorIndex, shaderBindings->ids, curRes.descriptorType);
2656                 break;
2657             }
2658             case CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2659             case CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2660             case CORE_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE:
2661             case CORE_DESCRIPTOR_TYPE_MAX_ENUM:
2662                 break;
2663         }
2664     }
2665 }
2666 
RenderCommandBeginDebugMarker(const RenderCommandWithType & ref)2667 void RenderBackendGLES::RenderCommandBeginDebugMarker(const RenderCommandWithType& ref)
2668 {
2669 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2670     const auto& renderCmd = *static_cast<const struct RenderCommandBeginDebugMarker*>(ref.rc);
2671     glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)renderCmd.name.data());
2672 #endif
2673 }
2674 
RenderCommandEndDebugMarker(const RenderCommandWithType &)2675 void RenderBackendGLES::RenderCommandEndDebugMarker(const RenderCommandWithType&)
2676 {
2677 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2678     glPopDebugGroup();
2679 #endif
2680 }
2681 
2682 #if (RENDER_PERF_ENABLED == 1)
StartFrameTimers(const RenderCommandFrameData & renderCommandFrameData)2683 void RenderBackendGLES::StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)
2684 {
2685     framePerfCounters_ = {};
2686     for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2687         const string_view& debugName = renderCommandContext.debugName;
2688         if (timers_.count(debugName) == 0) { // new timers
2689 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2690             PerfDataSet& perfDataSet = timers_[debugName];
2691             constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2692             perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryGLES(device_, desc));
2693             perfDataSet.counter = 0u;
2694 #else
2695             timers_.insert({ debugName, {} });
2696 #endif
2697         }
2698     }
2699 }
2700 
EndFrameTimers()2701 void RenderBackendGLES::EndFrameTimers()
2702 {
2703     int64_t fullGpuTime = 0;
2704 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2705     // already in micros
2706     fullGpuTime = fullGpuCounter_;
2707     fullGpuCounter_ = 0;
2708 #endif
2709     if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2710             CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2711         globalPerfData) {
2712         CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RENDER");
2713         perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2714         perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2715         perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2716         perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2717         perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2718         perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2719 
2720         CORE_PROFILER_PLOT("Full_Cpu", static_cast<int64_t>(commonCpuTimers_.full.GetMicroseconds()));
2721         CORE_PROFILER_PLOT("Acquire_Cpu", static_cast<int64_t>(commonCpuTimers_.acquire.GetMicroseconds()));
2722         CORE_PROFILER_PLOT("Execute_Cpu", static_cast<int64_t>(commonCpuTimers_.execute.GetMicroseconds()));
2723         CORE_PROFILER_PLOT("Submit_Cpu", static_cast<int64_t>(commonCpuTimers_.submit.GetMicroseconds()));
2724         CORE_PROFILER_PLOT("Present_Cpu", static_cast<int64_t>(commonCpuTimers_.present.GetMicroseconds()));
2725         CORE_PROFILER_PLOT("Full_Gpu", static_cast<int64_t>(fullGpuTime));
2726     }
2727 
2728     CORE_PROFILER_PLOT("Instance count", static_cast<int64_t>(framePerfCounters_.instanceCount));
2729     CORE_PROFILER_PLOT("Triangle count", static_cast<int64_t>(framePerfCounters_.triangleCount));
2730     CORE_PROFILER_PLOT("Draw count", static_cast<int64_t>(framePerfCounters_.drawCount));
2731     CORE_PROFILER_PLOT("Draw Indirect count", static_cast<int64_t>(framePerfCounters_.drawIndirectCount));
2732     CORE_PROFILER_PLOT("Dispatch count", static_cast<int64_t>(framePerfCounters_.dispatchCount));
2733     CORE_PROFILER_PLOT("Dispatch Indirect count", static_cast<int64_t>(framePerfCounters_.dispatchIndirectCount));
2734     CORE_PROFILER_PLOT("RenderPass count", static_cast<int64_t>(framePerfCounters_.renderPassCount));
2735     CORE_PROFILER_PLOT("Bind program count", static_cast<int64_t>(framePerfCounters_.bindProgram));
2736     CORE_PROFILER_PLOT("Bind sampler count", static_cast<int64_t>(framePerfCounters_.bindSampler));
2737     CORE_PROFILER_PLOT("Bind texture count", static_cast<int64_t>(framePerfCounters_.bindTexture));
2738     CORE_PROFILER_PLOT("Bind buffer count", static_cast<int64_t>(framePerfCounters_.bindBuffer));
2739 }
2740 
CopyPerfTimeStamp(const string_view name,PerfDataSet & perfDataSet)2741 void RenderBackendGLES::CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)
2742 {
2743 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2744     int64_t gpuMicroSeconds = 0;
2745     if (validGpuQueries_) {
2746         GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
2747         PLUGIN_ASSERT(gpuQuery);
2748 
2749         gpuQuery->NextQueryIndex();
2750 
2751         const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
2752         PLUGIN_ASSERT(platData.queryObject);
2753 
2754         GLint disjointOccurred = 0;
2755 #ifdef GL_GPU_DISJOINT_EXT
2756         // Clear disjoint error
2757         glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
2758 #endif
2759         if (!disjointOccurred && (++perfDataSet.counter) > device_.GetCommandBufferingCount()) {
2760             GLuint64 gpuNanoSeconds = 0U;
2761 #ifdef GL_GPU_DISJOINT_EXT
2762             glGetQueryObjectui64vEXT(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2763 #else
2764             glGetQueryObjectui64v(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2765 #endif
2766             static uint64_t NANOSECONDS_TO_MICROSECONDS = 1000;
2767             gpuMicroSeconds = static_cast<int64_t>(gpuNanoSeconds / NANOSECONDS_TO_MICROSECONDS);
2768             if (gpuMicroSeconds > UINT32_MAX) {
2769                 gpuMicroSeconds = 0;
2770             }
2771             fullGpuCounter_ += gpuMicroSeconds;
2772         } else if (disjointOccurred) {
2773             PLUGIN_LOG_V("GL_GPU_DISJOINT_EXT disjoint occurred.");
2774         }
2775     }
2776 #endif
2777     const int64_t cpuMicroSeconds = perfDataSet.cpuTimer.GetMicroseconds();
2778 
2779     if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2780             CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2781         globalPerfData) {
2782         CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2783 
2784         perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2785 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2786         perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2787 #endif
2788         perfData->UpdateData(name, "Backend_Count_Triangle", perfCounters_.triangleCount,
2789             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2790         perfData->UpdateData(name, "Backend_Count_InstanceCount", perfCounters_.instanceCount,
2791             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2792         perfData->UpdateData(name, "Backend_Count_Draw", perfCounters_.drawCount,
2793             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2794         perfData->UpdateData(name, "Backend_Count_DrawIndirect", perfCounters_.drawIndirectCount,
2795             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2796         perfData->UpdateData(name, "Backend_Count_Dispatch", perfCounters_.dispatchCount,
2797             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2798         perfData->UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters_.dispatchIndirectCount,
2799             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2800         perfData->UpdateData(name, "Backend_Count_RenderPass", perfCounters_.renderPassCount,
2801             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2802         perfData->UpdateData(name, "Backend_Count_BindProgram", perfCounters_.bindProgram,
2803             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2804         perfData->UpdateData(name, "Backend_Count_BindSample", perfCounters_.bindSampler,
2805             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2806         perfData->UpdateData(name, "Backend_Count_BindTexture", perfCounters_.bindTexture,
2807             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2808         perfData->UpdateData(name, "Backend_Count_BindBuffer", perfCounters_.bindBuffer,
2809             CORE_NS::IPerformanceDataManager::PerformanceTimingData::DataType::COUNT);
2810         framePerfCounters_.drawCount += perfCounters_.drawCount;
2811         framePerfCounters_.drawIndirectCount += perfCounters_.drawIndirectCount;
2812         framePerfCounters_.dispatchCount += perfCounters_.dispatchCount;
2813         framePerfCounters_.dispatchIndirectCount += perfCounters_.dispatchIndirectCount;
2814         framePerfCounters_.renderPassCount += perfCounters_.renderPassCount;
2815         framePerfCounters_.bindProgram += perfCounters_.bindProgram;
2816         framePerfCounters_.bindSampler += perfCounters_.bindSampler;
2817         framePerfCounters_.bindTexture += perfCounters_.bindTexture;
2818         framePerfCounters_.bindBuffer += perfCounters_.bindBuffer;
2819         framePerfCounters_.triangleCount += perfCounters_.triangleCount;
2820         framePerfCounters_.instanceCount += perfCounters_.instanceCount;
2821     }
2822 }
2823 #endif
2824 
PrimeDepthStencilState(const GraphicsState & graphicsState)2825 void RenderBackendGLES::PrimeDepthStencilState(const GraphicsState& graphicsState)
2826 {
2827     auto& cDepth = cacheState_.depthStencilState;
2828     cDepth = graphicsState.depthStencilState;
2829     // CORE_DYNAMIC_STATE_DEPTH_BOUNDS NOT SUPPORTED ON GLES. (and not implemented on GL either)
2830     SetState(GL_DEPTH_TEST, cDepth.enableDepthTest);
2831     SetState(GL_STENCIL_TEST, cDepth.enableStencilTest);
2832     glDepthFunc(GetCompareOp(cDepth.depthCompareOp));
2833     glDepthMask((cDepth.enableDepthWrite ? static_cast<GLboolean>(GL_TRUE) : static_cast<GLboolean>(GL_FALSE)));
2834     const uint32_t updateAllFlags =
2835         (StencilSetFlags::SETOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETCOMPAREOP |
2836             StencilSetFlags::SETREFERENCE | StencilSetFlags::SETWRITEMASK);
2837     SetStencilState(updateAllFlags, cDepth.frontStencilOpState, updateAllFlags, cDepth.backStencilOpState);
2838 }
2839 
PrimeBlendState(const GraphicsState & graphicsState)2840 void RenderBackendGLES::PrimeBlendState(const GraphicsState& graphicsState)
2841 {
2842     auto& cBlend = cacheState_.colorBlendState;
2843     cBlend = graphicsState.colorBlendState;
2844     glBlendColor(cBlend.colorBlendConstants[Gles::RED_INDEX], cBlend.colorBlendConstants[Gles::GREEN_INDEX],
2845         cBlend.colorBlendConstants[Gles::BLUE_INDEX], cBlend.colorBlendConstants[Gles::ALPHA_INDEX]);
2846     GLuint maxColorAttachments;
2847     glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS, (GLint*)&maxColorAttachments);
2848     maxColorAttachments = BASE_NS::Math::min(PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT, maxColorAttachments);
2849     for (GLuint i = 0; i < maxColorAttachments; i++) {
2850         const auto& cBlendState = cBlend.colorAttachments[i];
2851         glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2852             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2853             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2854             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2855         if (cBlendState.enableBlend) {
2856             glEnablei(GL_BLEND, i);
2857         } else {
2858             glDisablei(GL_BLEND, i);
2859         }
2860         glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2861             GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2862             GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2863         glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2864     }
2865     // logicops are unsupported on GLES
2866 }
2867 
PrimeCache(const GraphicsState & graphicsState)2868 void RenderBackendGLES::PrimeCache(const GraphicsState& graphicsState) // Forces the graphics state..
2869 {
2870     if (cachePrimed_) {
2871         return;
2872     }
2873     cachePrimed_ = true;
2874     /// GRAPHICSSTATE     inputAssembly
2875     const auto& ia = graphicsState.inputAssembly;
2876     auto& cia = cacheState_.inputAssembly;
2877     cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
2878     SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
2879     topology_ = ia.primitiveTopology;
2880     /// GRAPHICSSTATE     rasterizationState
2881     const auto& rs = graphicsState.rasterizationState;
2882     auto& crs = cacheState_.rasterizationState;
2883     // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2884     polygonMode_ = rs.polygonMode;
2885     // GL_DEPTH_CLAMP,rs.enableDepthClamp NOT SUPPORTED    CHECK GLES 3.2
2886     crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2887     SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2888     crs.enableDepthBias = rs.enableDepthBias;
2889     SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2890     crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2891     crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2892     glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2893     // depthBiasClamp NOT SUPPORTED! CHECK GLES 3.2
2894     // If cull mode Flags change...
2895     crs.cullModeFlags = rs.cullModeFlags;
2896     SetCullMode(crs);
2897     crs.frontFace = rs.frontFace;
2898     SetFrontFace(crs);
2899     crs.lineWidth = rs.lineWidth;
2900     glLineWidth(rs.lineWidth);
2901     PrimeDepthStencilState(graphicsState);
2902     PrimeBlendState(graphicsState);
2903 }
2904 
UpdateDepthState(const GraphicsState & graphicsState)2905 void RenderBackendGLES::UpdateDepthState(const GraphicsState& graphicsState)
2906 {
2907     const auto& depth = graphicsState.depthStencilState;
2908     auto& cDepth = cacheState_.depthStencilState;
2909     if (depth.enableDepthTest != cDepth.enableDepthTest) {
2910         cDepth.enableDepthTest = depth.enableDepthTest;
2911         SetState(GL_DEPTH_TEST, depth.enableDepthTest);
2912     }
2913     if (depth.depthCompareOp != cDepth.depthCompareOp) {
2914         cDepth.depthCompareOp = depth.depthCompareOp;
2915         glDepthFunc(GetCompareOp(depth.depthCompareOp));
2916     }
2917     if (depth.enableDepthWrite != cDepth.enableDepthWrite) {
2918         cDepth.enableDepthWrite = depth.enableDepthWrite;
2919         glDepthMask((depth.enableDepthWrite == GL_TRUE));
2920     }
2921     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BOUNDS)) {
2922         // CORE_DYNAMIC_STATE_DEPTH_BOUNDS not supported on GLES.
2923     }
2924 }
2925 
UpdateStencilState(const GraphicsState & graphicsState)2926 void RenderBackendGLES::UpdateStencilState(const GraphicsState& graphicsState)
2927 {
2928     const auto& depth = graphicsState.depthStencilState;
2929     auto& cDepth = cacheState_.depthStencilState;
2930     if (depth.enableStencilTest != cDepth.enableStencilTest) {
2931         cDepth.enableStencilTest = depth.enableStencilTest;
2932         SetState(GL_STENCIL_TEST, depth.enableStencilTest);
2933     }
2934     uint32_t setFront = 0;
2935     uint32_t setBack = 0;
2936     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_REFERENCE)) {
2937         if (cDepth.frontStencilOpState.reference != depth.frontStencilOpState.reference) {
2938             setFront |= StencilSetFlags::SETREFERENCE;
2939         }
2940         if (cDepth.backStencilOpState.reference != depth.backStencilOpState.reference) {
2941             setBack |= StencilSetFlags::SETREFERENCE;
2942         }
2943     }
2944     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
2945         if (cDepth.frontStencilOpState.compareMask != depth.frontStencilOpState.compareMask) {
2946             setFront |= StencilSetFlags::SETCOMPAREMASK;
2947         }
2948         if (cDepth.backStencilOpState.compareMask != depth.backStencilOpState.compareMask) {
2949             setBack |= StencilSetFlags::SETCOMPAREMASK;
2950         }
2951     }
2952     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
2953         if (cDepth.frontStencilOpState.writeMask != depth.frontStencilOpState.writeMask) {
2954             setFront |= StencilSetFlags::SETWRITEMASK;
2955         }
2956         if (cDepth.backStencilOpState.writeMask != depth.backStencilOpState.writeMask) {
2957             setBack |= StencilSetFlags::SETWRITEMASK;
2958         }
2959     }
2960     if (cDepth.frontStencilOpState.compareOp != depth.frontStencilOpState.compareOp) {
2961         setFront |= StencilSetFlags::SETCOMPAREOP;
2962     }
2963     if (cDepth.backStencilOpState.compareOp != depth.backStencilOpState.compareOp) {
2964         setBack |= StencilSetFlags::SETCOMPAREOP;
2965     }
2966     if (!CompareStencilOp(cDepth.frontStencilOpState, depth.frontStencilOpState)) {
2967         setFront |= StencilSetFlags::SETOP;
2968     }
2969     if (!CompareStencilOp(cDepth.backStencilOpState, depth.backStencilOpState)) {
2970         setBack |= StencilSetFlags::SETOP;
2971     }
2972     SetStencilState(setFront, depth.frontStencilOpState, setBack, depth.backStencilOpState);
2973 }
2974 
UpdateDepthStencilState(const GraphicsState & graphicsState)2975 void RenderBackendGLES::UpdateDepthStencilState(const GraphicsState& graphicsState)
2976 {
2977     UpdateDepthState(graphicsState);
2978     UpdateStencilState(graphicsState);
2979 }
2980 
UpdateBlendState(const GraphicsState & graphicsState)2981 void RenderBackendGLES::UpdateBlendState(const GraphicsState& graphicsState)
2982 {
2983     const auto& blend = graphicsState.colorBlendState;
2984     auto& cBlend = cacheState_.colorBlendState;
2985     for (GLuint i = 0; i < blend.colorAttachmentCount; i++) {
2986         const auto& blendState = blend.colorAttachments[i];
2987         auto& cBlendState = cBlend.colorAttachments[i];
2988         if (blendState.colorWriteMask != cBlendState.colorWriteMask) {
2989             cBlendState.colorWriteMask = blendState.colorWriteMask;
2990             glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2991                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2992                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2993                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2994         }
2995 
2996         // Check if blend state has changed
2997         bool factorsChanged = false;
2998         bool opsChanged = false;
2999 
3000         if (blendState.enableBlend) {
3001             factorsChanged = !CompareBlendFactors(cBlendState, blendState);
3002             opsChanged = !CompareBlendOps(cBlendState, blendState);
3003         }
3004 
3005         if (blendState.enableBlend == cBlendState.enableBlend && !factorsChanged && !opsChanged) {
3006             continue;
3007         }
3008         cBlendState.enableBlend = blendState.enableBlend;
3009         if (blendState.enableBlend) {
3010             glEnablei(GL_BLEND, i);
3011             if (factorsChanged) {
3012                 SetBlendFactors(cBlendState, blendState);
3013                 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
3014                     GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
3015                     GetBlendFactor(cBlendState.dstAlphaBlendFactor));
3016             }
3017             if (opsChanged) {
3018                 SetBlendOps(cBlendState, blendState);
3019                 glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
3020             }
3021         } else {
3022             glDisablei(GL_BLEND, i);
3023         }
3024     }
3025     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_BLEND_CONSTANTS)) {
3026         if (!Compare(cBlend.colorBlendConstants, blend.colorBlendConstants)) {
3027             Set(cBlend.colorBlendConstants, blend.colorBlendConstants);
3028             glBlendColor(blend.colorBlendConstants[Gles::RED_INDEX], blend.colorBlendConstants[Gles::GREEN_INDEX],
3029                 blend.colorBlendConstants[Gles::BLUE_INDEX], blend.colorBlendConstants[Gles::ALPHA_INDEX]);
3030         }
3031     }
3032     // logicOps in blend not supported on GLES
3033 }
3034 
UpdateRasterizationState(const GraphicsState & graphicsState)3035 void RenderBackendGLES::UpdateRasterizationState(const GraphicsState& graphicsState)
3036 {
3037     const auto& rs = graphicsState.rasterizationState;
3038     auto& crs = cacheState_.rasterizationState;
3039     // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
3040     polygonMode_ = rs.polygonMode;
3041 #if RENDER_HAS_GL_BACKEND
3042     if (rs.polygonMode != crs.polygonMode) {
3043         crs.polygonMode = rs.polygonMode;
3044         SetPolygonMode(rs);
3045     }
3046 #endif
3047     if (rs.enableDepthClamp != crs.enableDepthClamp) {
3048         crs.enableDepthClamp = rs.enableDepthClamp;
3049         // NOT SUPPORTED    (needs an extension)
3050     }
3051     if (rs.enableRasterizerDiscard != crs.enableRasterizerDiscard) {
3052         crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
3053         SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
3054     }
3055     if (rs.enableDepthBias != crs.enableDepthBias) {
3056         crs.enableDepthBias = rs.enableDepthBias;
3057         SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
3058     }
3059     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BIAS)) {
3060         if ((rs.depthBiasConstantFactor != crs.depthBiasConstantFactor) ||
3061             (rs.depthBiasSlopeFactor != crs.depthBiasSlopeFactor)) {
3062             crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
3063             crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
3064             glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
3065         }
3066         // depthBiasClamp NOT SUPPORTED    (needs an extension)
3067     }
3068     // If cull mode Flags change...
3069     if (rs.cullModeFlags != crs.cullModeFlags) {
3070         crs.cullModeFlags = rs.cullModeFlags;
3071         SetCullMode(crs);
3072     }
3073     auto frontFace = rs.frontFace;
3074     if (!renderingToDefaultFbo_) {
3075         // Flip winding for default fbo.
3076         if (frontFace == FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE) {
3077             frontFace = FrontFace::CORE_FRONT_FACE_CLOCKWISE;
3078         } else if (frontFace == FrontFace::CORE_FRONT_FACE_CLOCKWISE) {
3079             frontFace = FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE;
3080         }
3081     }
3082     if (frontFace != crs.frontFace) {
3083         crs.frontFace = frontFace;
3084         SetFrontFace(crs);
3085     }
3086     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_LINE_WIDTH)) {
3087         if (rs.lineWidth != crs.lineWidth) {
3088             crs.lineWidth = rs.lineWidth;
3089             glLineWidth(rs.lineWidth);
3090         }
3091     }
3092 }
3093 
DoGraphicsState(const GraphicsState & graphicsState)3094 void RenderBackendGLES::DoGraphicsState(const GraphicsState& graphicsState)
3095 {
3096     /// GRAPHICSSTATE     inputAssembly
3097     const auto& ia = graphicsState.inputAssembly;
3098     if (ia.enablePrimitiveRestart != graphicsState.inputAssembly.enablePrimitiveRestart) {
3099         auto& cia = cacheState_.inputAssembly;
3100         cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
3101         SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
3102     }
3103     topology_ = ia.primitiveTopology;
3104     UpdateRasterizationState(graphicsState);
3105     UpdateDepthStencilState(graphicsState);
3106     UpdateBlendState(graphicsState);
3107 }
3108 
SetViewport(const ViewportDesc & vd)3109 void RenderBackendGLES::SetViewport(const ViewportDesc& vd)
3110 {
3111     const bool viewportPrimed = BASE_NS::exchange(viewportPrimed_, true);
3112     const bool updateV = (!viewportPrimed) || ((vd.x != viewport_.x) || (vd.y != viewport_.y) ||
3113                                                   (vd.width != viewport_.width) || (vd.height != viewport_.height));
3114     const bool updateD =
3115         (!viewportPrimed) || ((vd.minDepth != viewport_.minDepth) || (vd.maxDepth != viewport_.maxDepth));
3116 
3117     if (updateV) {
3118         viewport_.x = vd.x;
3119         viewport_.y = vd.y;
3120         viewport_.width = vd.width;
3121         viewport_.height = vd.height;
3122         // Handle top-left / bottom-left origin conversion
3123         auto y = static_cast<GLint>(vd.y);
3124         const auto h = static_cast<GLsizei>(vd.height);
3125         if (renderingToDefaultFbo_) {
3126             if (currentFrameBuffer_) {
3127                 const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
3128                 y = fh - (y + h);
3129                 glViewport(static_cast<GLint>(vd.x), y, static_cast<GLsizei>(vd.width), h);
3130             } else {
3131                 viewportPending_ = true;
3132             }
3133         } else {
3134             glViewport(static_cast<GLint>(vd.x), y, static_cast<GLsizei>(vd.width), h);
3135         }
3136     }
3137     if (updateD) {
3138         viewport_.minDepth = vd.minDepth;
3139         viewport_.maxDepth = vd.maxDepth;
3140         glDepthRangef(vd.minDepth, vd.maxDepth);
3141     }
3142 }
3143 
SetScissor(const ScissorDesc & sd)3144 void RenderBackendGLES::SetScissor(const ScissorDesc& sd)
3145 {
3146     // NOTE: scissordesc is in floats?!?
3147     const bool scissorPrimed = BASE_NS::exchange(scissorPrimed_, true);
3148     const bool updateS =
3149         (!scissorPrimed) ||
3150         ((sd.offsetX != scissorBox_.offsetX) || (sd.offsetY != scissorBox_.offsetY) ||
3151             (sd.extentWidth != scissorBox_.extentWidth) || (sd.extentHeight != scissorBox_.extentHeight));
3152     if (updateS) {
3153         scissorBox_ = sd;
3154         // Handle top-left / bottom-left origin conversion
3155         auto y = static_cast<GLint>(sd.offsetY);
3156         const auto h = static_cast<GLsizei>(sd.extentHeight);
3157         if (renderingToDefaultFbo_) {
3158             const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
3159             y = fh - (y + h);
3160         }
3161         glScissor(static_cast<GLint>(sd.offsetX), y, static_cast<GLsizei>(sd.extentWidth), h);
3162     }
3163 }
3164 RENDER_END_NAMESPACE()
3165