• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "render_backend_gles.h"
17 
18 #include <algorithm>
19 
20 #if RENDER_HAS_GLES_BACKEND
21 #define EGL_EGLEXT_PROTOTYPES
22 #include <EGL/egl.h>
23 #include <EGL/eglext.h>
24 #undef EGL_EGLEXT_PROTOTYPES
25 #endif
26 
27 #include <base/containers/fixed_string.h>
28 #include <render/datastore/render_data_store_render_pods.h> // NodeGraphBackbufferConfiguration...
29 #include <render/namespace.h>
30 
31 #if (RENDER_PERF_ENABLED == 1)
32 #include <core/perf/cpu_perf_scope.h>
33 #include <core/perf/intf_performance_data_manager.h>
34 
35 #include "perf/gpu_query.h"
36 #include "perf/gpu_query_manager.h"
37 #endif
38 #include "device/gpu_resource_manager.h"
39 #include "gles/device_gles.h"
40 #include "gles/gl_functions.h"
41 #include "gles/gpu_buffer_gles.h"
42 #include "gles/gpu_image_gles.h"
43 #include "gles/gpu_program_gles.h"
44 #include "gles/gpu_query_gles.h"
45 #include "gles/gpu_sampler_gles.h"
46 #include "gles/gpu_semaphore_gles.h"
47 #include "gles/node_context_descriptor_set_manager_gles.h"
48 #include "gles/node_context_pool_manager_gles.h"
49 #include "gles/pipeline_state_object_gles.h"
50 #include "gles/render_frame_sync_gles.h"
51 #include "gles/swapchain_gles.h"
52 #include "nodecontext/render_command_list.h"
53 #include "nodecontext/render_node_graph_node_store.h" // RenderCommandFrameData
54 #include "util/log.h"
55 #include "util/render_frame_util.h"
56 
57 #define IS_BIT(value, bit) ((((value) & (bit)) == (bit)) ? true : false)
58 #define IS_BIT_GL(value, bit) ((((value) & (bit)) == (bit)) ? (GLboolean)GL_TRUE : (GLboolean)GL_FALSE)
59 
60 using namespace BASE_NS;
61 
62 // NOTE: implement missing commands, add state caching and cleanup a bit more.
63 RENDER_BEGIN_NAMESPACE()
64 namespace Gles {
65 // Indices to colorBlendConstants
66 static constexpr uint32_t RED_INDEX = 0;
67 static constexpr uint32_t GREEN_INDEX = 1;
68 static constexpr uint32_t BLUE_INDEX = 2;
69 static constexpr uint32_t ALPHA_INDEX = 3;
70 static constexpr uint32_t CUBEMAP_LAYERS = 6;
71 struct Bind {
72     DescriptorType descriptorType { CORE_DESCRIPTOR_TYPE_MAX_ENUM };
73     struct BufferType {
74         uint32_t bufferId;
75         uint32_t offset;
76         uint32_t size;
77     };
78     struct ImageType {
79         GpuImageGLES* image;
80         uint32_t mode;
81         uint32_t mipLevel;
82     };
83     struct SamplerType {
84         uint32_t samplerId;
85     };
86     struct Resource {
87         union {
88             Bind::BufferType buffer { 0, 0, 0 };
89             Bind::ImageType image;
90         };
91         SamplerType sampler { 0 };
92     };
93     vector<Resource> resources;
94 };
95 } // namespace Gles
96 namespace {
GetRenderHandleType(const DescriptorType descriptorType)97 constexpr RenderHandleType GetRenderHandleType(const DescriptorType descriptorType)
98 {
99     if (descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
100         return RenderHandleType::GPU_SAMPLER;
101     } else if (((descriptorType >= CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
102                    (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE)) ||
103                (descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
104         return RenderHandleType::GPU_IMAGE;
105     } else if ((descriptorType >= CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) &&
106                (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
107         return RenderHandleType::GPU_BUFFER;
108     }
109     return RenderHandleType::UNDEFINED;
110 }
111 
112 constexpr GLenum LAYER_ID[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
113     GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
114     GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
115 
GetCubeMapTarget(GLenum type,uint32_t layer)116 GLenum GetCubeMapTarget(GLenum type, uint32_t layer)
117 {
118     if (type == GL_TEXTURE_CUBE_MAP) {
119         PLUGIN_ASSERT_MSG(layer < Gles::CUBEMAP_LAYERS, "Invalid cubemap index %u", layer);
120         return LAYER_ID[layer];
121     }
122     PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
123     return GL_NONE;
124 }
125 
GetTarget(GLenum type,uint32_t layer,uint32_t sampleCount)126 GLenum GetTarget(GLenum type, uint32_t layer, uint32_t sampleCount)
127 {
128     if (type == GL_TEXTURE_2D) {
129         if (sampleCount > 1) {
130             return GL_TEXTURE_2D_MULTISAMPLE;
131         }
132         return GL_TEXTURE_2D;
133     }
134     if (type == GL_TEXTURE_CUBE_MAP) {
135         PLUGIN_ASSERT_MSG(sampleCount == 1, "Cubemap texture can't have MSAA");
136         return GetCubeMapTarget(type, layer);
137     }
138     PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
139     return GL_NONE;
140 }
141 
142 struct BlitArgs {
143     uint32_t mipLevel {};
144     Size3D rect0 {};
145     Size3D rect1 {};
146     uint32_t height {};
147 };
148 
DoBlit(const Filter filter,const BlitArgs & src,const BlitArgs & dst)149 void DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)
150 {
151     // Handle top-left / bottom-left origin conversion
152     auto sy = static_cast<GLint>(src.rect0.height);
153     const auto sh = static_cast<const GLint>(src.rect1.height);
154     const auto sfh = static_cast<GLint>(src.height >> src.mipLevel);
155     sy = sfh - (sy + sh);
156     auto dy = static_cast<GLint>(dst.rect0.height);
157     const auto dh = static_cast<const GLint>(dst.rect1.height);
158     const auto dfh = static_cast<GLint>(dst.height >> dst.mipLevel);
159     dy = dfh - (dy + dh);
160     GLenum glfilter = GL_NEAREST;
161     if (filter == CORE_FILTER_NEAREST) {
162         glfilter = GL_NEAREST;
163     } else if (filter == CORE_FILTER_LINEAR) {
164         glfilter = GL_LINEAR;
165     } else {
166         PLUGIN_ASSERT_MSG(false, "RenderCommandBlitImage Invalid filter mode");
167     }
168     glBlitFramebuffer(static_cast<GLint>(src.rect0.width), sy, static_cast<GLint>(src.rect1.width), sfh,
169         static_cast<GLint>(dst.rect0.width), dy, static_cast<GLint>(dst.rect1.width), dfh, GL_COLOR_BUFFER_BIT,
170         glfilter);
171 }
172 
GetPrimFromTopology(PrimitiveTopology op)173 GLenum GetPrimFromTopology(PrimitiveTopology op)
174 {
175     switch (op) {
176         case CORE_PRIMITIVE_TOPOLOGY_POINT_LIST:
177             return GL_POINTS;
178         case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST:
179             return GL_LINES;
180         case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP:
181             return GL_LINE_STRIP;
182         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
183             return GL_TRIANGLES;
184         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
185             return GL_TRIANGLE_STRIP;
186         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
187             return GL_TRIANGLE_FAN;
188 #if defined(GL_ES_VERSION_3_2) || defined(GL_VERSION_3_2)
189             // The following are valid after gles 3.2
190         case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
191             return GL_LINES_ADJACENCY;
192         case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
193             return GL_LINE_STRIP_ADJACENCY;
194         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
195             return GL_TRIANGLES_ADJACENCY;
196         case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
197             return GL_TRIANGLE_STRIP_ADJACENCY;
198         case CORE_PRIMITIVE_TOPOLOGY_PATCH_LIST:
199             return GL_PATCHES;
200 #endif
201         default:
202             PLUGIN_ASSERT_MSG(false, "Unsupported primitive topology");
203             break;
204     }
205     return GL_POINTS;
206 }
207 
GetBlendOp(BlendOp func)208 GLenum GetBlendOp(BlendOp func)
209 {
210     switch (func) {
211         case CORE_BLEND_OP_ADD:
212             return GL_FUNC_ADD;
213         case CORE_BLEND_OP_SUBTRACT:
214             return GL_FUNC_SUBTRACT;
215         case CORE_BLEND_OP_REVERSE_SUBTRACT:
216             return GL_FUNC_REVERSE_SUBTRACT;
217         case CORE_BLEND_OP_MIN:
218             return GL_MIN;
219         case CORE_BLEND_OP_MAX:
220             return GL_MAX;
221         default:
222             break;
223     }
224     return GL_FUNC_ADD;
225 }
226 
GetBlendFactor(BlendFactor factor)227 GLenum GetBlendFactor(BlendFactor factor)
228 {
229     switch (factor) {
230         case CORE_BLEND_FACTOR_ZERO:
231             return GL_ZERO;
232         case CORE_BLEND_FACTOR_ONE:
233             return GL_ONE;
234         case CORE_BLEND_FACTOR_SRC_COLOR:
235             return GL_SRC_COLOR;
236         case CORE_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
237             return GL_ONE_MINUS_SRC_COLOR;
238         case CORE_BLEND_FACTOR_DST_COLOR:
239             return GL_DST_COLOR;
240         case CORE_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
241             return GL_ONE_MINUS_DST_COLOR;
242         case CORE_BLEND_FACTOR_SRC_ALPHA:
243             return GL_SRC_ALPHA;
244         case CORE_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
245             return GL_ONE_MINUS_SRC_ALPHA;
246         case CORE_BLEND_FACTOR_DST_ALPHA:
247             return GL_DST_ALPHA;
248         case CORE_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
249             return GL_ONE_MINUS_DST_ALPHA;
250         case CORE_BLEND_FACTOR_CONSTANT_COLOR:
251             return GL_CONSTANT_COLOR;
252         case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
253             return GL_ONE_MINUS_CONSTANT_COLOR;
254         case CORE_BLEND_FACTOR_CONSTANT_ALPHA:
255             return GL_CONSTANT_ALPHA;
256         case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
257             return GL_ONE_MINUS_CONSTANT_ALPHA;
258         case CORE_BLEND_FACTOR_SRC_ALPHA_SATURATE:
259             return GL_SRC_ALPHA_SATURATE;
260             // NOTE: check the GLES3.2...
261             /* following requires EXT_blend_func_extended (dual source blending) */
262         case CORE_BLEND_FACTOR_SRC1_COLOR:
263         case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
264         case CORE_BLEND_FACTOR_SRC1_ALPHA:
265         case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
266         default:
267             break;
268     }
269     return GL_ONE;
270 }
271 
GetCompareOp(CompareOp aOp)272 GLenum GetCompareOp(CompareOp aOp)
273 {
274     switch (aOp) {
275         case CORE_COMPARE_OP_NEVER:
276             return GL_NEVER;
277         case CORE_COMPARE_OP_LESS:
278             return GL_LESS;
279         case CORE_COMPARE_OP_EQUAL:
280             return GL_EQUAL;
281         case CORE_COMPARE_OP_LESS_OR_EQUAL:
282             return GL_LEQUAL;
283         case CORE_COMPARE_OP_GREATER:
284             return GL_GREATER;
285         case CORE_COMPARE_OP_NOT_EQUAL:
286             return GL_NOTEQUAL;
287         case CORE_COMPARE_OP_GREATER_OR_EQUAL:
288             return GL_GEQUAL;
289         case CORE_COMPARE_OP_ALWAYS:
290             return GL_ALWAYS;
291         default:
292             break;
293     }
294     return GL_ALWAYS;
295 }
296 
GetStencilOp(StencilOp aOp)297 GLenum GetStencilOp(StencilOp aOp)
298 {
299     switch (aOp) {
300         case CORE_STENCIL_OP_KEEP:
301             return GL_KEEP;
302         case CORE_STENCIL_OP_ZERO:
303             return GL_ZERO;
304         case CORE_STENCIL_OP_REPLACE:
305             return GL_REPLACE;
306         case CORE_STENCIL_OP_INCREMENT_AND_CLAMP:
307             return GL_INCR;
308         case CORE_STENCIL_OP_DECREMENT_AND_CLAMP:
309             return GL_DECR;
310         case CORE_STENCIL_OP_INVERT:
311             return GL_INVERT;
312         case CORE_STENCIL_OP_INCREMENT_AND_WRAP:
313             return GL_INCR_WRAP;
314         case CORE_STENCIL_OP_DECREMENT_AND_WRAP:
315             return GL_DECR_WRAP;
316         default:
317             break;
318     }
319     return GL_KEEP;
320 }
321 
SetState(GLenum type,bool enabled)322 void SetState(GLenum type, bool enabled)
323 {
324     if (enabled) {
325         glEnable(type);
326     } else {
327         glDisable(type);
328     }
329 }
330 
SetCullMode(const GraphicsState::RasterizationState & rs)331 void SetCullMode(const GraphicsState::RasterizationState& rs)
332 {
333     SetState(GL_CULL_FACE, (rs.cullModeFlags != CORE_CULL_MODE_NONE));
334 
335     switch (rs.cullModeFlags) {
336         case CORE_CULL_MODE_FRONT_BIT:
337             glCullFace(GL_FRONT);
338             break;
339         case CORE_CULL_MODE_BACK_BIT:
340             glCullFace(GL_BACK);
341             break;
342         case CORE_CULL_MODE_FRONT_AND_BACK:
343             glCullFace(GL_FRONT_AND_BACK);
344             break;
345         case CORE_CULL_MODE_NONE:
346         default:
347             break;
348     }
349 }
350 
SetFrontFace(const GraphicsState::RasterizationState & rs)351 void SetFrontFace(const GraphicsState::RasterizationState& rs)
352 {
353     switch (rs.frontFace) {
354         case CORE_FRONT_FACE_COUNTER_CLOCKWISE:
355             glFrontFace(GL_CCW);
356             break;
357         case CORE_FRONT_FACE_CLOCKWISE:
358             glFrontFace(GL_CW);
359             break;
360         default:
361             break;
362     }
363 }
364 
365 #if RENDER_HAS_GL_BACKEND
SetPolygonMode(const GraphicsState::RasterizationState & rs)366 void SetPolygonMode(const GraphicsState::RasterizationState& rs)
367 {
368     GLenum mode;
369     switch (rs.polygonMode) {
370         default:
371         case CORE_POLYGON_MODE_FILL:
372             mode = GL_FILL;
373             break;
374         case CORE_POLYGON_MODE_LINE:
375             mode = GL_LINE;
376             break;
377         case CORE_POLYGON_MODE_POINT:
378             mode = GL_POINT;
379             break;
380     }
381     glPolygonMode(GL_FRONT_AND_BACK, mode);
382 }
383 #endif
384 
Invalidate(GLenum framebuffer,int32_t count,const GLenum invalidate[],const RenderPassDesc & rpd,const LowlevelFramebufferGL & frameBuffer)385 void Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd,
386     const LowlevelFramebufferGL& frameBuffer)
387 {
388     if (count > 0) {
389         if ((frameBuffer.width == rpd.renderArea.extentWidth) && (frameBuffer.height == rpd.renderArea.extentHeight)) {
390             // Invalidate the whole buffer.  (attachment sizes match render area)
391             glInvalidateFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate);
392         } else {
393             // invalidate only a part of the render target..
394             // NOTE: verify that this works, we might need to flip the Y axis the same way as scissors etc.
395             const auto X = static_cast<const GLint>(rpd.renderArea.offsetX);
396             const auto Y = static_cast<const GLint>(rpd.renderArea.offsetY);
397             const auto W = static_cast<const GLsizei>(rpd.renderArea.extentWidth);
398             const auto H = static_cast<const GLsizei>(rpd.renderArea.extentHeight);
399             glInvalidateSubFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate, X, Y, W, H);
400         }
401     }
402 }
403 
404 struct BlitData {
405     const GpuImagePlatformDataGL& iPlat;
406     const GpuImageDesc& imageDesc;
407     const BufferImageCopy& bufferImageCopy;
408     uintptr_t data { 0 };
409     uint64_t size { 0 };
410     uint64_t sizeOfData { 0 };
411     bool compressed { false };
412 };
413 
BlitArray(DeviceGLES & device_,const BlitData & bd)414 void BlitArray(DeviceGLES& device_, const BlitData& bd)
415 {
416     const auto& iPlat = bd.iPlat;
417     const auto& bufferImageCopy = bd.bufferImageCopy;
418     const auto& imageSubresource = bufferImageCopy.imageSubresource;
419     const auto& imageDesc = bd.imageDesc;
420     const uint32_t mip = imageSubresource.mipLevel;
421     const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
422     // NOTE: image offset depth is ignored
423     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
424     const Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
425         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height),
426         Math::min(imageSize.z, bufferImageCopy.imageExtent.depth) };
427     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
428     if (valid) {
429         uintptr_t data = bd.data;
430         const uint32_t layerCount = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
431         for (uint32_t layer = imageSubresource.baseArrayLayer; layer < layerCount; layer++) {
432             const Math::UVec3 offset3D { offset.x, offset.y, layer };
433             if (bd.compressed) {
434                 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
435                     iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
436             } else {
437                 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
438                     iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
439             }
440             data += static_cast<ptrdiff_t>(bd.sizeOfData);
441         }
442     }
443 }
444 
Blit2D(DeviceGLES & device_,const BlitData & bd)445 void Blit2D(DeviceGLES& device_, const BlitData& bd)
446 {
447     const auto& iPlat = bd.iPlat;
448     const auto& bufferImageCopy = bd.bufferImageCopy;
449     const auto& imageSubresource = bufferImageCopy.imageSubresource;
450     const auto& imageDesc = bd.imageDesc;
451     const uint32_t mip = imageSubresource.mipLevel;
452     const Math::UVec2 imageSize { imageDesc.width >> mip, imageDesc.height >> mip };
453     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
454     const Math::UVec2 extent { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
455         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height) };
456     PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == 1,
457         "RenderCommandCopyBufferImage Texture2D with baseArrayLayer!=0 && layerCount!= 1");
458     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
459     const uintptr_t data = bd.data;
460     if (valid && bd.compressed) {
461         device_.CompressedTexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent,
462             iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
463     } else if (valid) {
464         device_.TexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent, iPlat.format,
465             iPlat.dataType, reinterpret_cast<const void*>(data));
466     }
467 }
468 
Blit3D(DeviceGLES & device_,const BlitData & bd)469 void Blit3D(DeviceGLES& device_, const BlitData& bd)
470 {
471     const auto& iPlat = bd.iPlat;
472     const auto& bufferImageCopy = bd.bufferImageCopy;
473     const auto& imageSubresource = bufferImageCopy.imageSubresource;
474     const auto& imageDesc = bd.imageDesc;
475     const uint32_t mip = imageSubresource.mipLevel;
476     const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth >> mip };
477     const Math::UVec3 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height,
478         bufferImageCopy.imageOffset.depth };
479     Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
480         Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height), Math::min(imageSize.z - offset.z, 1U) };
481     const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
482     if (valid) {
483         uintptr_t data = bd.data;
484         for (uint32_t slice = 0U; slice < imageSize.z; ++slice) {
485             const Math::UVec3 offset3D { offset.x, offset.y, slice };
486             if (bd.compressed) {
487                 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
488                     iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
489             } else {
490                 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
491                     iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
492             }
493             // offsets one slice
494             data += static_cast<uintptr_t>(bd.sizeOfData);
495         }
496     }
497 }
498 
BlitCube(DeviceGLES & device_,const BlitData & bd)499 void BlitCube(DeviceGLES& device_, const BlitData& bd)
500 {
501     const auto& iPlat = bd.iPlat;
502     const auto& bufferImageCopy = bd.bufferImageCopy;
503     const auto& imageSubresource = bufferImageCopy.imageSubresource;
504     const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
505     const Math::UVec2 extent { bufferImageCopy.imageExtent.width, bufferImageCopy.imageExtent.height };
506     constexpr GLenum faceId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
507         GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
508         GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
509     PLUGIN_UNUSED(Gles::CUBEMAP_LAYERS);
510     PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == Gles::CUBEMAP_LAYERS,
511         "RenderCommandCopyBufferImage Cubemap with baseArrayLayer!=0 && layerCount!= 6");
512     uintptr_t data = bd.data;
513     const uint32_t lastLayer = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
514     for (uint32_t i = imageSubresource.baseArrayLayer; i < lastLayer; i++) {
515         const GLenum face = faceId[i]; // convert layer index to cube map face id.
516         if (face == 0) {
517             // reached the end of cubemap faces (see faceId)
518             // so must stop copying.
519             break;
520         }
521         if (bd.compressed) {
522             device_.CompressedTexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent,
523                 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
524         } else {
525             device_.TexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent, iPlat.format,
526                 iPlat.dataType, reinterpret_cast<const void*>(data));
527         }
528         data += static_cast<uintptr_t>(bd.sizeOfData);
529     }
530 }
531 template<bool usePixelUnpackBuffer>
532 
SetupBlit(DeviceGLES & device_,const BufferImageCopy & bufferImageCopy,GpuBufferGLES & srcGpuBuffer,const GpuImageGLES & dstGpuImage)533 BlitData SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer,
534     const GpuImageGLES& dstGpuImage)
535 {
536     const auto& iPlat = dstGpuImage.GetPlatformData();
537     const auto& imageOffset = bufferImageCopy.imageOffset;
538     PLUGIN_UNUSED(imageOffset);
539     const auto& imageExtent = bufferImageCopy.imageExtent;
540     // size is calculated for single layer / slice
541     const uint64_t size = static_cast<uint64_t>(iPlat.bytesperpixel) *
542                           static_cast<uint64_t>(bufferImageCopy.bufferImageHeight) *
543                           static_cast<uint64_t>(bufferImageCopy.bufferRowLength);
544     uintptr_t data = bufferImageCopy.bufferOffset;
545     if constexpr (usePixelUnpackBuffer) {
546         const auto& plat = srcGpuBuffer.GetPlatformData();
547         device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, plat.buffer);
548     } else {
549         // Use the mapped pointer for glTexSubImage2D, this is a workaround on GL_INVALID_OPERATION on PVR GLES
550         // simulator and crash with ETC2 textures on NVIDIA..
551         data += reinterpret_cast<uintptr_t>(srcGpuBuffer.Map());
552     }
553     uint64_t sizeOfData = size;
554     const auto& compinfo = iPlat.compression;
555     if (compinfo.compressed) {
556         // how many blocks in width
557         const int64_t blockW = (imageExtent.width + (compinfo.blockW - 1)) / compinfo.blockW;
558         // how many blocks in height
559         const int64_t blockH = (imageExtent.height + (compinfo.blockH - 1)) / compinfo.blockH;
560         // size in bytes..
561         sizeOfData = static_cast<uint64_t>(((blockW * blockH) * compinfo.bytesperblock));
562 
563         // Warn for partial copies. we do not handle those at the moment.
564         if (bufferImageCopy.bufferRowLength != 0) {
565             if (bufferImageCopy.bufferRowLength != blockW * compinfo.blockW) {
566                 PLUGIN_LOG_W("Partial copies of compressed texture data is not currently supported. "
567                              "Stride must match image width (with block align). "
568                              "bufferImageCopy.bufferRowLength(%d) "
569                              "imageExtent.width(%d) ",
570                     bufferImageCopy.bufferRowLength, imageExtent.width);
571             }
572         }
573         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
574         glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0);
575     } else {
576         glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(bufferImageCopy.bufferRowLength));
577         glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, static_cast<GLint>(bufferImageCopy.bufferImageHeight));
578     }
579     glPixelStorei(GL_UNPACK_ALIGNMENT, 1); // Make sure the align is tight.
580     return { iPlat, dstGpuImage.GetDesc(), bufferImageCopy, data, size, sizeOfData, compinfo.compressed };
581 }
582 
583 template<bool usePixelUnpackBuffer>
FinishBlit(DeviceGLES & device_,const GpuBufferGLES & srcGpuBuffer)584 void FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)
585 {
586     if constexpr (usePixelUnpackBuffer) {
587         device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
588     } else {
589         srcGpuBuffer.Unmap();
590     }
591 }
592 
593 template<typename T, size_t N>
Compare(const T (& a)[N],const T (& b)[N])594 constexpr size_t Compare(const T (&a)[N], const T (&b)[N])
595 {
596     for (size_t i = 0; i < N; i++) {
597         if (a[i] != b[i])
598             return false;
599     }
600     return true;
601 }
602 
603 template<typename T, size_t N>
604 
Set(T (& a)[N],const T (& b)[N])605 constexpr size_t Set(T (&a)[N], const T (&b)[N])
606 {
607     for (size_t i = 0; i < N; i++) {
608         a[i] = b[i];
609     }
610     return true;
611 }
612 
CompareBlendFactors(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)613 bool CompareBlendFactors(
614     const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
615 {
616     return (a.srcColorBlendFactor == b.srcColorBlendFactor) && (a.srcAlphaBlendFactor == b.srcAlphaBlendFactor) &&
617            (a.dstColorBlendFactor == b.dstColorBlendFactor) && (a.dstAlphaBlendFactor == b.dstAlphaBlendFactor);
618 }
619 
SetBlendFactors(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)620 void SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
621 {
622     a.srcColorBlendFactor = b.srcColorBlendFactor;
623     a.srcAlphaBlendFactor = b.srcAlphaBlendFactor;
624     a.dstColorBlendFactor = b.dstColorBlendFactor;
625     a.dstAlphaBlendFactor = b.dstAlphaBlendFactor;
626 }
627 
CompareBlendOps(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)628 bool CompareBlendOps(
629     const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
630 {
631     return (a.colorBlendOp == b.colorBlendOp) && (a.alphaBlendOp == b.alphaBlendOp);
632 }
633 
SetBlendOps(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)634 void SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
635 {
636     a.colorBlendOp = b.colorBlendOp;
637     a.alphaBlendOp = b.alphaBlendOp;
638 }
639 
CompareStencilOp(const GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)640 bool CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
641 {
642     return (a.failOp == b.failOp) && (a.depthFailOp == b.depthFailOp) && (a.passOp == b.passOp);
643 }
644 
SetStencilOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)645 void SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
646 {
647     a.failOp = b.failOp;
648     a.depthFailOp = b.depthFailOp;
649     a.passOp = b.passOp;
650 }
651 
SetStencilCompareOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)652 void SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
653 {
654     a.compareOp = b.compareOp;
655     a.compareMask = b.compareMask;
656     a.reference = b.reference;
657 }
658 
659 #if RENDER_VALIDATION_ENABLED
ValidateCopyImage(const Offset3D & offset,const Size3D & extent,uint32_t mipLevel,const GpuImageDesc & imageDesc)660 void ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)
661 {
662     if (mipLevel >= imageDesc.mipCount) {
663         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage mipLevel must be less than image mipCount.");
664     }
665     if ((offset.x < 0) || (offset.y < 0) || (offset.z < 0)) {
666         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset must not be negative.");
667     }
668     if (((offset.x + extent.width) > imageDesc.width) || ((offset.y + extent.height) > imageDesc.height) ||
669         ((offset.z + extent.depth) > imageDesc.depth)) {
670         PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset + extent does not fit in image.");
671     }
672 }
673 
ValidateCopyImage(const ImageCopy & imageCopy,const GpuImageDesc & srcImageDesc,const GpuImageDesc & dstImageDesc)674 void ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)
675 {
676     ValidateCopyImage(imageCopy.srcOffset, imageCopy.extent, imageCopy.srcSubresource.mipLevel, srcImageDesc);
677     ValidateCopyImage(imageCopy.dstOffset, imageCopy.extent, imageCopy.dstSubresource.mipLevel, dstImageDesc);
678 }
679 #endif
680 
ClampOffset(int32_t & srcOffset,int32_t & dstOffset,uint32_t & size)681 constexpr void ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)
682 {
683     if (srcOffset < 0) {
684         auto iSize = static_cast<int32_t>(size);
685         size = static_cast<uint32_t>(iSize + srcOffset);
686         dstOffset -= srcOffset;
687         srcOffset = 0;
688     }
689 }
690 
ClampOffset(Offset3D & srcOffset,Offset3D & dstOffset,Size3D & size)691 constexpr void ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)
692 {
693     ClampOffset(srcOffset.x, dstOffset.x, size.width);
694     ClampOffset(srcOffset.y, dstOffset.y, size.height);
695     ClampOffset(srcOffset.z, dstOffset.z, size.depth);
696 }
697 
ClampSize(int32_t offset,uint32_t maxSize,uint32_t & size)698 constexpr void ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)
699 {
700     if (size > static_cast<uint32_t>(static_cast<int32_t>(maxSize) - offset)) {
701         size = static_cast<uint32_t>(static_cast<int32_t>(maxSize) - offset);
702     }
703 }
704 
ClampSize(const Offset3D & offset,const GpuImageDesc & desc,Size3D & size)705 constexpr void ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)
706 {
707     ClampSize(offset.x, desc.width, size.width);
708     ClampSize(offset.y, desc.height, size.height);
709     ClampSize(offset.z, desc.depth, size.depth);
710 }
711 
712 // helper which covers barriers supported by Barrier and BarrierByRegion
CommonBarrierBits(AccessFlags accessFlags,RenderHandleType resourceType)713 constexpr GLbitfield CommonBarrierBits(AccessFlags accessFlags, RenderHandleType resourceType)
714 {
715     GLbitfield barriers = 0;
716     if (accessFlags & CORE_ACCESS_UNIFORM_READ_BIT) {
717         barriers |= GL_UNIFORM_BARRIER_BIT;
718     }
719     if (accessFlags & CORE_ACCESS_SHADER_READ_BIT) {
720         // shader read covers UBO, SSBO, storage image etc. use resource type to limit the options.
721         if (resourceType == RenderHandleType::GPU_IMAGE) {
722             barriers |= GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
723         } else if (resourceType == RenderHandleType::GPU_BUFFER) {
724             barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
725         } else {
726             barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
727                         GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
728         }
729     }
730     if (accessFlags & CORE_ACCESS_SHADER_WRITE_BIT) {
731         if (resourceType == RenderHandleType::GPU_IMAGE) {
732             barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
733         } else if (resourceType == RenderHandleType::GPU_BUFFER) {
734             barriers |= GL_SHADER_STORAGE_BARRIER_BIT;
735         } else {
736             barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
737         }
738     }
739     if (accessFlags & (CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT |
740                           CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)) {
741         barriers |= GL_FRAMEBUFFER_BARRIER_BIT;
742     }
743     // GL_ATOMIC_COUNTER_BARRIER_BIT is not used at the moment
744     return barriers;
745 }
746 
GetArrayOffset(const DescriptorSetLayoutBindingResourcesHandler & data,const DescriptorSetLayoutBindingResource & res)747 constexpr uint32_t GetArrayOffset(
748     const DescriptorSetLayoutBindingResourcesHandler& data, const DescriptorSetLayoutBindingResource& res)
749 {
750     const RenderHandleType type = GetRenderHandleType(res.binding.descriptorType);
751     if (type == RenderHandleType::GPU_BUFFER) {
752         return data.buffers[res.resourceIndex].desc.arrayOffset;
753     }
754     if (type == RenderHandleType::GPU_IMAGE) {
755         return data.images[res.resourceIndex].desc.arrayOffset;
756     }
757     if (type == RenderHandleType::GPU_SAMPLER) {
758         return data.samplers[res.resourceIndex].desc.arrayOffset;
759     }
760     return 0u;
761 }
762 } // namespace
763 
RenderBackendGLES(Device & device,GpuResourceManager & gpuResourceManager)764 RenderBackendGLES::RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)
765     : RenderBackend(), device_(static_cast<DeviceGLES&>(device)), gpuResourceMgr_(gpuResourceManager)
766 {
767 #if (RENDER_PERF_ENABLED == 1)
768     validGpuQueries_ = false;
769 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
770     gpuQueryMgr_ = make_unique<GpuQueryManager>();
771 #if RENDER_HAS_GL_BACKEND
772     if (device_.GetBackendType() == DeviceBackendType::OPENGL) {
773         validGpuQueries_ = true;
774     }
775 #endif
776 #if RENDER_HAS_GLES_BACKEND
777     if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
778         // Check if GL_EXT_disjoint_timer_query is available.
779         validGpuQueries_ = device_.HasExtension("GL_EXT_disjoint_timer_query");
780     }
781 #endif
782 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
783 #endif // RENDER_PERF_ENABLED
784 #if RENDER_HAS_GLES_BACKEND
785     if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
786         multisampledRenderToTexture_ = device_.HasExtension("GL_EXT_multisampled_render_to_texture2");
787     }
788 #endif
789     PLUGIN_ASSERT(device_.IsActive());
790     PrimeCache(GraphicsState {}); // Initializes cache.
791     glGenFramebuffers(1, &blitImageSourceFbo_);
792     glGenFramebuffers(1, &blitImageDestinationFbo_);
793 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
794     PLUGIN_LOG_D("fbo id >: %u", blitImageSourceFbo_);
795     PLUGIN_LOG_D("fbo id >: %u", blitImageDestinationFbo_);
796 #endif
797 #if !RENDER_HAS_GLES_BACKEND
798     glEnable(GL_PROGRAM_POINT_SIZE);
799 #endif
800 }
801 
~RenderBackendGLES()802 RenderBackendGLES::~RenderBackendGLES()
803 {
804     PLUGIN_ASSERT(device_.IsActive());
805     device_.DeleteFrameBuffer(blitImageSourceFbo_);
806     device_.DeleteFrameBuffer(blitImageDestinationFbo_);
807 }
808 
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)809 void RenderBackendGLES::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
810 {
811     if (!backBufferConfig.swapchainData.empty()) {
812         if (device_.HasSwapchain()) {
813 #if (RENDER_PERF_ENABLED == 1)
814             commonCpuTimers_.present.Begin();
815 #endif
816             for (const auto& swapchainData : backBufferConfig.swapchainData) {
817 #if (RENDER_DEV_ENABLED == 1)
818                 if (swapchainData.config.gpuSemaphoreHandle) {
819                     // NOTE: not implemented
820                     PLUGIN_LOG_E("NodeGraphBackBufferConfiguration semaphore not signaled");
821                 }
822 #endif
823                 const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapchainData.handle));
824                 if (swp) {
825 #if RENDER_GL_FLIP_Y_SWAPCHAIN
826                     // Blit and flip our swapchain frame to backbuffer..
827                     const auto& sdesc = swp->GetDesc();
828                     if (scissorEnabled_) {
829                         glDisable(GL_SCISSOR_TEST);
830                         scissorEnabled_ = false;
831                     }
832                     const auto& platSwapchain = swp->GetPlatformData();
833                     device_.BindReadFrameBuffer(platSwapchain.fbos[presentationInfo_.swapchainImageIndex]);
834                     device_.BindWriteFrameBuffer(0); // FBO 0  is the surface bound to current context..
835                     glBlitFramebuffer(0, 0, (GLint)sdesc.width, (GLint)sdesc.height, 0, (GLint)sdesc.height,
836                         (GLint)sdesc.width, 0, GL_COLOR_BUFFER_BIT, GL_NEAREST);
837                     device_.BindReadFrameBuffer(0);
838 #endif
839                     device_.SwapBuffers(*swp);
840                 }
841             }
842 #if (RENDER_PERF_ENABLED == 1)
843             commonCpuTimers_.present.End();
844 #endif
845         }
846     }
847 }
848 
ResetState()849 void RenderBackendGLES::ResetState()
850 {
851     boundProgram_ = {};
852     boundIndexBuffer_ = {};
853     vertexAttribBinds_ = 0;
854     renderingToDefaultFbo_ = false;
855     boundComputePipeline_ = nullptr;
856     boundGraphicsPipeline_ = nullptr;
857     currentPsoHandle_ = {};
858     renderArea_ = {};
859     activeRenderPass_ = {};
860     currentSubPass_ = 0;
861     currentFrameBuffer_ = nullptr;
862     scissorBoxUpdated_ = viewportDepthRangeUpdated_ = viewportUpdated_ = true;
863     inRenderpass_ = 0;
864 }
865 
ResetBindings()866 void RenderBackendGLES::ResetBindings()
867 {
868     for (auto& b : boundObjects_) {
869         b.dirty = true;
870     }
871     boundComputePipeline_ = nullptr;
872     boundGraphicsPipeline_ = nullptr;
873     currentPsoHandle_ = {};
874 }
875 
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)876 void RenderBackendGLES::Render(
877     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
878 {
879     // NOTE: all command lists are validated before entering here
880     PLUGIN_ASSERT(device_.IsActive());
881 #if (RENDER_PERF_ENABLED == 1)
882     commonCpuTimers_.full.Begin();
883     commonCpuTimers_.acquire.Begin();
884 #endif
885     presentationInfo_ = {};
886 
887     if (device_.HasSwapchain() && (!backBufferConfig.swapchainData.empty())) {
888         for (const auto& swapData : backBufferConfig.swapchainData) {
889             if (const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapData.handle))) {
890                 presentationInfo_.swapchainImageIndex = swp->GetNextImage();
891                 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
892                 if (presentationInfo_.swapchainImageIndex < swapchainData.imageViewCount) {
893                     // remap image to backbuffer
894                     const RenderHandle currentSwapchainHandle =
895                         swapchainData.imageViews[presentationInfo_.swapchainImageIndex];
896                     // special swapchain remapping
897                     gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(swapData.handle, currentSwapchainHandle);
898                 }
899             }
900         }
901     }
902 #if (RENDER_PERF_ENABLED == 1)
903     commonCpuTimers_.acquire.End();
904 
905     StartFrameTimers(renderCommandFrameData);
906     commonCpuTimers_.execute.Begin();
907 #endif
908     // global begin backend frame
909     auto& descriptorSetMgr = (DescriptorSetManagerGles&)device_.GetDescriptorSetManager();
910     descriptorSetMgr.BeginBackendFrame();
911 
912     // Reset bindings.
913     ResetState();
914     for (const auto& ref : renderCommandFrameData.renderCommandContexts) {
915         // Reset bindings between command lists..
916         ResetBindings();
917         RenderSingleCommandList(ref);
918     }
919 #if (RENDER_PERF_ENABLED == 1)
920     commonCpuTimers_.execute.End();
921 #endif
922     RenderProcessEndCommandLists(renderCommandFrameData, backBufferConfig);
923 #if (RENDER_PERF_ENABLED == 1)
924     commonCpuTimers_.full.End();
925     EndFrameTimers();
926 #endif
927 }
928 
RenderProcessEndCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)929 void RenderBackendGLES::RenderProcessEndCommandLists(
930     RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
931 {
932     if (auto* frameSync = static_cast<RenderFrameSyncGLES*>(renderCommandFrameData.renderFrameSync); frameSync) {
933         frameSync->GetFrameFence();
934     }
935     // signal external GPU fences
936     if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
937         auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
938         const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
939         PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
940         if (externalSignals.size() == externalSemaphores.size()) {
941             for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
942                 // needs to be false
943                 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
944                     if (const auto* gs = (const GpuSemaphoreGles*)externalSemaphores[sigIdx].get(); gs) {
945                         auto& plat = const_cast<GpuSemaphorePlatformDataGles&>(gs->GetPlatformData());
946                         // NOTE: currently could create only one GPU sync
947 #if RENDER_HAS_GLES_BACKEND
948                         const auto disp =
949                             static_cast<const DevicePlatformDataGLES &>(device_.GetEglState().GetPlatformData())
950                                 .display;
951                         EGLSyncKHR sync = eglCreateSyncKHR(disp, EGL_SYNC_NATIVE_FENCE_ANDROID, nullptr);
952                         if (sync == EGL_NO_SYNC_KHR) {
953                             PLUGIN_LOG_E("eglCreateSyncKHR fail");
954                         }
955 #elif RENDER_HAS_GL_BACKEND
956                         GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
957 #else
958                         uint64_t sync = 0;
959                         PLUGIN_LOG_E("no supported backend to create fence");
960 #endif
961                         plat.sync = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sync));
962                         externalSignals[sigIdx].gpuSignalResourceHandle = plat.sync;
963                         externalSignals[sigIdx].signaled = true;
964 
965                         // NOTE: client is expected to add code for the wait with glClientWaitSync(sync, X, 0)
966                     }
967                 }
968             }
969         }
970     }
971 }
972 
RenderCommandUndefined(const RenderCommandWithType & renderCommand)973 void RenderBackendGLES::RenderCommandUndefined(const RenderCommandWithType& renderCommand)
974 {
975     PLUGIN_ASSERT_MSG(false, "non-valid render command");
976 }
977 
RenderSingleCommandList(const RenderCommandContext & renderCommandCtx)978 void RenderBackendGLES::RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)
979 {
980     // these are validated in render graph
981     managers_ = { renderCommandCtx.nodeContextPsoMgr, renderCommandCtx.nodeContextPoolMgr,
982         renderCommandCtx.nodeContextDescriptorSetMgr, renderCommandCtx.renderBarrierList };
983 
984     managers_.poolMgr->BeginBackendFrame();
985     managers_.psoMgr->BeginBackendFrame();
986 #if (RENDER_PERF_ENABLED == 1) || (RENDER_DEBUG_MARKERS_ENABLED == 1)
987     const auto& debugName = renderCommandCtx.debugName;
988 #endif
989 #if (RENDER_PERF_ENABLED == 1)
990     perfCounters_ = {};
991     PLUGIN_ASSERT(timers_.count(debugName) == 1);
992     PerfDataSet& perfDataSet = timers_[debugName];
993     perfDataSet.cpuTimer.Begin();
994 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
995     if (validGpuQueries_) {
996 #ifdef GL_GPU_DISJOINT_EXT
997         /* Clear disjoint error */
998         GLint disjointOccurred = 0;
999         glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
1000 #endif
1001         GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
1002         PLUGIN_ASSERT(gpuQuery);
1003 
1004         const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
1005         PLUGIN_ASSERT(platData.queryObject);
1006         glBeginQuery(GL_TIME_ELAPSED_EXT, platData.queryObject);
1007     }
1008 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
1009 #endif // RENDER_PERF_ENABLED
1010 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1011     glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)debugName.data());
1012 #endif
1013     commandListValid_ = true;
1014     for (const auto& ref : renderCommandCtx.renderCommandList->GetRenderCommands()) {
1015         PLUGIN_ASSERT(ref.rc);
1016         if (commandListValid_) {
1017 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
1018             glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)COMMAND_NAMES[(uint32_t)ref.type]);
1019 #endif
1020             (this->*(COMMAND_HANDLERS[static_cast<uint32_t>(ref.type)]))(ref);
1021 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
1022             glPopDebugGroup();
1023 #endif
1024         }
1025     }
1026 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1027     glPopDebugGroup();
1028 #endif
1029 #if (RENDER_PERF_ENABLED == 1)
1030 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1031     if (validGpuQueries_) {
1032         glEndQuery(GL_TIME_ELAPSED_EXT);
1033     }
1034 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
1035     perfDataSet.cpuTimer.End();
1036     CopyPerfTimeStamp(debugName, perfDataSet);
1037 #endif // RENDER_PERF_ENABLED
1038 }
1039 
RenderCommandBindPipeline(const RenderCommandWithType & ref)1040 void RenderBackendGLES::RenderCommandBindPipeline(const RenderCommandWithType& ref)
1041 {
1042     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_PIPELINE);
1043     const auto& renderCmd = *static_cast<const struct RenderCommandBindPipeline*>(ref.rc);
1044     boundProgram_ = {};
1045     if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE) {
1046         PLUGIN_ASSERT(currentFrameBuffer_ == nullptr);
1047         BindComputePipeline(renderCmd);
1048     } else if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
1049         BindGraphicsPipeline(renderCmd);
1050     }
1051     currentPsoHandle_ = renderCmd.psoHandle;
1052 }
1053 
BindComputePipeline(const struct RenderCommandBindPipeline & renderCmd)1054 void RenderBackendGLES::BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)
1055 {
1056     const auto* pso = static_cast<const ComputePipelineStateObjectGLES*>(
1057         managers_.psoMgr->GetComputePso(renderCmd.psoHandle, nullptr));
1058     if (pso) {
1059         const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1060         // Setup descriptorset bind cache..
1061         SetupCache(data.pipelineLayout);
1062     }
1063     boundComputePipeline_ = pso;
1064     boundGraphicsPipeline_ = nullptr;
1065 }
1066 
SetupCache(const PipelineLayout & pipelineLayout)1067 void RenderBackendGLES::SetupCache(const PipelineLayout& pipelineLayout)
1068 {
1069     // based on pipeline layout. (note that compatible sets should "save state")
1070     for (uint32_t set = 0; set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++set) {
1071         // mark unmatching sets dirty (all for now)
1072         // resize the cache stuffs.
1073         const auto& s = pipelineLayout.descriptorSetLayouts[set];
1074         if (s.set == PipelineLayoutConstants::INVALID_INDEX) {
1075             boundObjects_[set].dirty = true;
1076 #if RENDER_HAS_GLES_BACKEND
1077             boundObjects_[set].oesBinds.clear();
1078 #endif
1079             boundObjects_[set].resources.clear();
1080             continue;
1081         }
1082         PLUGIN_ASSERT(s.set == set);
1083 
1084         uint32_t maxB = 0;
1085         // NOTE: compatibility optimizations?
1086         // NOTE: we expect bindings to be sorted.
1087         if (s.bindings.back().binding == s.bindings.size() - 1U) {
1088             // since the last binding matches the size, expect it to be continuous.
1089             maxB = static_cast<uint32_t>(s.bindings.size());
1090         } else {
1091             // Sparse binding.
1092             // NOTE: note sparse sets will waste memory here. (see notes in
1093             // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkDescriptorSetLayoutBinding.html)
1094             for (const auto& bind : s.bindings) {
1095                 maxB = Math::max(maxB, bind.binding);
1096             }
1097             maxB += 1U; // zero based bindings..
1098         }
1099         if (boundObjects_[set].resources.size() != maxB) {
1100             // resource count change.. (so it's dirty then)
1101             boundObjects_[set].dirty = true;
1102 #if RENDER_HAS_GLES_BACKEND
1103             boundObjects_[set].oesBinds.clear();
1104 #endif
1105             boundObjects_[set].resources.clear(); // clear because we don't care what it had before.
1106             boundObjects_[set].resources.resize(maxB);
1107         }
1108 
1109         for (const auto& b : s.bindings) {
1110             auto& o = boundObjects_[set].resources[b.binding];
1111             // ignore b.shaderStageFlags for now.
1112             if ((o.resources.size() != b.descriptorCount) || (o.descriptorType != b.descriptorType)) {
1113                 // mark set dirty, since "not matching"
1114                 o.resources.clear();
1115                 o.resources.resize(b.descriptorCount);
1116                 o.descriptorType = b.descriptorType;
1117                 boundObjects_[set].dirty = true;
1118 #if RENDER_HAS_GLES_BACKEND
1119                 boundObjects_[set].oesBinds.clear();
1120 #endif
1121             }
1122         }
1123     }
1124 }
1125 
BindGraphicsPipeline(const struct RenderCommandBindPipeline & renderCmd)1126 void RenderBackendGLES::BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)
1127 {
1128     const auto* pso = static_cast<const GraphicsPipelineStateObjectGLES*>(
1129         managers_.psoMgr->GetGraphicsPso(renderCmd.psoHandle, activeRenderPass_.renderPassDesc,
1130             activeRenderPass_.subpasses, activeRenderPass_.subpassStartIndex, 0, nullptr, nullptr));
1131     if (pso) {
1132         const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1133         dynamicStateFlags_ = data.dynamicStateFlags;
1134         DoGraphicsState(data.graphicsState);
1135         // NOTE: Deprecate (default viewport/scissor should be set from default targets at some point)
1136         if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_VIEWPORT)) {
1137             SetViewport(renderArea_, ViewportDesc { 0.0f, 0.0f, static_cast<float>(renderArea_.extentWidth),
1138                                          static_cast<float>(renderArea_.extentHeight), 0.0f, 1.0f });
1139         }
1140         if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_SCISSOR)) {
1141             SetScissor(renderArea_, ScissorDesc { 0, 0, renderArea_.extentWidth, renderArea_.extentHeight });
1142         }
1143         // Setup descriptorset bind cache..
1144         SetupCache(data.pipelineLayout);
1145     }
1146     boundComputePipeline_ = nullptr;
1147     boundGraphicsPipeline_ = pso;
1148 }
1149 
RenderCommandDraw(const RenderCommandWithType & ref)1150 void RenderBackendGLES::RenderCommandDraw(const RenderCommandWithType& ref)
1151 {
1152     PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW);
1153     const auto& renderCmd = *static_cast<struct RenderCommandDraw*>(ref.rc);
1154     if (!boundGraphicsPipeline_) {
1155         return;
1156     }
1157     PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1158     BindResources();
1159     const auto type = GetPrimFromTopology(topology_);
1160     const auto instanceCount = static_cast<GLsizei>(renderCmd.instanceCount);
1161     // firstInstance is not supported yet, need to set the SPIRV_Cross generated uniform
1162     // "SPIRV_Cross_BaseInstance" to renderCmd.firstInstance;
1163     if (renderCmd.indexCount) {
1164         uintptr_t offsetp = boundIndexBuffer_.offset;
1165         GLenum indexType = GL_UNSIGNED_SHORT;
1166         switch (boundIndexBuffer_.type) {
1167             case CORE_INDEX_TYPE_UINT16:
1168                 offsetp += renderCmd.firstIndex * sizeof(uint16_t);
1169                 indexType = GL_UNSIGNED_SHORT;
1170                 break;
1171             case CORE_INDEX_TYPE_UINT32:
1172                 offsetp += renderCmd.firstIndex * sizeof(uint32_t);
1173                 indexType = GL_UNSIGNED_INT;
1174                 break;
1175             default:
1176                 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1177                 break;
1178         }
1179         const auto indexCount = static_cast<const GLsizei>(renderCmd.indexCount);
1180         const auto vertexOffset = static_cast<const GLsizei>(renderCmd.vertexOffset);
1181         const void* offset = reinterpret_cast<const void*>(offsetp);
1182         if (renderCmd.instanceCount > 1) {
1183             if (vertexOffset) {
1184                 glDrawElementsInstancedBaseVertex(type, indexCount, indexType, offset, instanceCount, vertexOffset);
1185             } else {
1186                 glDrawElementsInstanced(type, indexCount, indexType, offset, instanceCount);
1187             }
1188         } else {
1189             if (vertexOffset) {
1190                 glDrawElementsBaseVertex(type, indexCount, indexType, offset, vertexOffset);
1191             } else {
1192                 glDrawElements(type, indexCount, indexType, offset);
1193             }
1194         }
1195 #if (RENDER_PERF_ENABLED == 1)
1196         ++perfCounters_.drawCount;
1197         perfCounters_.instanceCount += renderCmd.instanceCount;
1198         perfCounters_.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1199 #endif
1200     } else {
1201         const auto firstVertex = static_cast<const GLsizei>(renderCmd.firstVertex);
1202         const auto vertexCount = static_cast<const GLsizei>(renderCmd.vertexCount);
1203         if (renderCmd.instanceCount > 1) {
1204             glDrawArraysInstanced(type, firstVertex, vertexCount, instanceCount);
1205         } else {
1206             glDrawArrays(type, firstVertex, vertexCount);
1207         }
1208 #if (RENDER_PERF_ENABLED == 1)
1209         ++perfCounters_.drawCount;
1210         perfCounters_.instanceCount += renderCmd.instanceCount;
1211         perfCounters_.triangleCount += (renderCmd.vertexCount * 3) * renderCmd.instanceCount; // 3: vertex dimension
1212 #endif
1213     }
1214 }
1215 
RenderCommandDrawIndirect(const RenderCommandWithType & ref)1216 void RenderBackendGLES::RenderCommandDrawIndirect(const RenderCommandWithType& ref)
1217 {
1218     PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW_INDIRECT);
1219     const auto& renderCmd = *static_cast<const struct RenderCommandDrawIndirect*>(ref.rc);
1220     if (!boundGraphicsPipeline_) {
1221         return;
1222     }
1223     PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1224     BindResources();
1225     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1226         const auto& plat = gpuBuffer->GetPlatformData();
1227         device_.BindBuffer(GL_DRAW_INDIRECT_BUFFER, plat.buffer);
1228         const auto type = GetPrimFromTopology(topology_);
1229         auto offset = static_cast<GLintptr>(renderCmd.offset);
1230         if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1231             GLenum indexType = GL_UNSIGNED_SHORT;
1232             switch (boundIndexBuffer_.type) {
1233                 case CORE_INDEX_TYPE_UINT16:
1234                     indexType = GL_UNSIGNED_SHORT;
1235                     break;
1236                 case CORE_INDEX_TYPE_UINT32:
1237                     indexType = GL_UNSIGNED_INT;
1238                     break;
1239                 default:
1240                     PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1241                     break;
1242             }
1243             for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1244                 glDrawElementsIndirect(type, indexType, reinterpret_cast<const void*>(offset));
1245                 offset += renderCmd.stride;
1246             }
1247         } else {
1248             for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1249                 glDrawArraysIndirect(type, reinterpret_cast<const void*>(offset));
1250                 offset += renderCmd.stride;
1251             }
1252         }
1253 #if (RENDER_PERF_ENABLED == 1)
1254         perfCounters_.drawIndirectCount += renderCmd.drawCount;
1255 #endif
1256     }
1257 }
1258 
RenderCommandDispatch(const RenderCommandWithType & ref)1259 void RenderBackendGLES::RenderCommandDispatch(const RenderCommandWithType& ref)
1260 {
1261     PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH);
1262     const auto& renderCmd = *static_cast<const struct RenderCommandDispatch*>(ref.rc);
1263     if (!boundComputePipeline_) {
1264         return;
1265     }
1266     PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1267     BindResources();
1268     glDispatchCompute(renderCmd.groupCountX, renderCmd.groupCountY, renderCmd.groupCountZ);
1269 #if (RENDER_PERF_ENABLED == 1)
1270     ++perfCounters_.dispatchCount;
1271 #endif
1272 }
1273 
RenderCommandDispatchIndirect(const RenderCommandWithType & ref)1274 void RenderBackendGLES::RenderCommandDispatchIndirect(const RenderCommandWithType& ref)
1275 {
1276     PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH_INDIRECT);
1277     const auto& renderCmd = *static_cast<const struct RenderCommandDispatchIndirect*>(ref.rc);
1278     if (!boundComputePipeline_) {
1279         return;
1280     }
1281     PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1282     BindResources();
1283     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1284         const auto& plat = gpuBuffer->GetPlatformData();
1285         device_.BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, plat.buffer);
1286         glDispatchComputeIndirect(static_cast<GLintptr>(renderCmd.offset));
1287 #if (RENDER_PERF_ENABLED == 1)
1288         ++perfCounters_.dispatchIndirectCount;
1289 #endif
1290     }
1291 }
1292 
ClearScissorInit(const RenderPassDesc::RenderArea & aArea)1293 void RenderBackendGLES::ClearScissorInit(const RenderPassDesc::RenderArea& aArea)
1294 {
1295     resetScissor_ = false;           // need to reset scissor state after clear?
1296     clearScissorSet_ = true;         // need to setup clear scissors before clear?
1297     clearScissor_ = aArea;           // area to be cleared
1298     if (scissorPrimed_) {            // have scissors been set yet?
1299         if ((!scissorBoxUpdated_) && // if there is a pending scissor change, ignore the scissorbox.
1300             (clearScissor_.offsetX == scissorBox_.offsetX) && (clearScissor_.offsetY == scissorBox_.offsetY) &&
1301             (clearScissor_.extentWidth == scissorBox_.extentWidth) &&
1302             (clearScissor_.extentHeight == scissorBox_.extentHeight)) {
1303             // Current scissors match clearscissor area, so no need to set it again.
1304             clearScissorSet_ = false;
1305         }
1306     }
1307 }
1308 
ClearScissorSet()1309 void RenderBackendGLES::ClearScissorSet()
1310 {
1311     if (clearScissorSet_) {       // do we need to set clear scissors.
1312         clearScissorSet_ = false; // clear scissors have been set now.
1313         resetScissor_ = true;     // we are modifying scissors, so remember to reset them afterwards.
1314         glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1315             static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1316     }
1317 }
1318 
ClearScissorReset()1319 void RenderBackendGLES::ClearScissorReset()
1320 {
1321     if (resetScissor_) { // need to reset correct scissors?
1322         if (!scissorPrimed_) {
1323             // scissors have not been set yet, so use clearbox as current cache state (and don't change scissor
1324             // setting)
1325             scissorPrimed_ = true;
1326             scissorBox_.offsetX = clearScissor_.offsetX;
1327             scissorBox_.offsetY = clearScissor_.offsetY;
1328             scissorBox_.extentHeight = clearScissor_.extentHeight;
1329             scissorBox_.extentWidth = clearScissor_.extentWidth;
1330         } else {
1331             // Restore scissor box to cached state. (update scissors when needed, since clearBox != scissorBox)
1332             scissorBoxUpdated_ = true; // ie. request to update scissor state.
1333         }
1334     }
1335 }
1336 
HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc * > colorAttachments)1337 void RenderBackendGLES::HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)
1338 {
1339     constexpr ColorComponentFlags clearAll = CORE_COLOR_COMPONENT_R_BIT | CORE_COLOR_COMPONENT_G_BIT |
1340                                              CORE_COLOR_COMPONENT_B_BIT | CORE_COLOR_COMPONENT_A_BIT;
1341     const auto& cBlend = cacheState_.colorBlendState;
1342     for (uint32_t idx = 0; idx < colorAttachments.size(); ++idx) {
1343         if (colorAttachments[idx] == nullptr) {
1344             continue;
1345         }
1346         const auto& ref = *(colorAttachments[idx]);
1347         if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1348             const auto& cBlendState = cBlend.colorAttachments[idx];
1349             if (clearAll != cBlendState.colorWriteMask) {
1350                 glColorMaski(idx, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1351             }
1352             ClearScissorSet();
1353             // glClearBufferfv only for float formats?
1354             // glClearBufferiv & glClearbufferuv only for integer formats?
1355             glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), ref.clearValue.color.float32);
1356             if (clearAll != cBlendState.colorWriteMask) {
1357                 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1358                 glColorMaski(idx, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
1359                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
1360                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
1361                     IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
1362             }
1363         }
1364     }
1365 }
1366 
HandleDepthAttachment(const RenderPassDesc::AttachmentDesc & depthAttachment)1367 void RenderBackendGLES::HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)
1368 {
1369     const GLuint allBits = 0xFFFFFFFFu;
1370     const auto& ref = depthAttachment;
1371     const bool clearDepth = (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1372     const bool clearStencil = (ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1373     // Change state if needed.
1374     if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1375         glDepthMask(GL_TRUE);
1376     }
1377     if (clearStencil) {
1378         if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1379             glStencilMaskSeparate(GL_FRONT, allBits);
1380         }
1381         if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1382             glStencilMaskSeparate(GL_BACK, allBits);
1383         }
1384     }
1385     if (clearDepth || clearStencil) {
1386         // Set the scissors for clear..
1387         ClearScissorSet();
1388     }
1389     // Do clears.
1390     if (clearDepth && clearStencil) {
1391         glClearBufferfi(GL_DEPTH_STENCIL, 0, ref.clearValue.depthStencil.depth,
1392             static_cast<GLint>(ref.clearValue.depthStencil.stencil));
1393     } else if (clearDepth) {
1394         glClearBufferfv(GL_DEPTH, 0, &ref.clearValue.depthStencil.depth);
1395     } else if (clearStencil) {
1396         glClearBufferiv(GL_STENCIL, 0, reinterpret_cast<const GLint*>(&ref.clearValue.depthStencil.stencil));
1397     }
1398 
1399     // Restore cached state, if we touched the state.
1400     if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1401         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1402         glDepthMask(GL_FALSE);
1403     }
1404     if (clearStencil) {
1405         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1406         if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1407             glStencilMaskSeparate(GL_FRONT, cacheState_.depthStencilState.frontStencilOpState.writeMask);
1408         }
1409         if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1410             glStencilMaskSeparate(GL_BACK, cacheState_.depthStencilState.backStencilOpState.writeMask);
1411         }
1412     }
1413 }
1414 
DoSubPass(uint32_t subPass)1415 void RenderBackendGLES::DoSubPass(uint32_t subPass)
1416 {
1417     if (currentFrameBuffer_ == nullptr) {
1418         // Completely invalid state in backend.
1419         return;
1420     }
1421     const auto& rpd = activeRenderPass_.renderPassDesc;
1422     const auto& sb = activeRenderPass_.subpasses[subPass];
1423 
1424     // If there's no FBO activate the with swapchain handle so that drawing happens to the correct surface.
1425     if (!currentFrameBuffer_->fbos[subPass].fbo && (sb.colorAttachmentCount == 1U)) {
1426         auto color = rpd.attachmentHandles[sb.colorAttachmentIndices[0]];
1427         device_.Activate(color);
1428     }
1429     device_.BindFrameBuffer(currentFrameBuffer_->fbos[subPass].fbo);
1430     ClearScissorInit(renderArea_);
1431     if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1432         SetState(GL_RASTERIZER_DISCARD, GL_FALSE);
1433     }
1434     {
1435         // NOTE: clear is not yet optimal. depth, stencil and color should be cleared using ONE glClear call if
1436         // possible. (ie. all buffers at once)
1437         renderingToDefaultFbo_ = false;
1438         if (sb.colorAttachmentCount > 0) {
1439             // collect color attachment infos..
1440             const RenderPassDesc::AttachmentDesc*
1441                 colorAttachments[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1442             for (uint32_t ci = 0; ci < sb.colorAttachmentCount; ci++) {
1443                 uint32_t index = sb.colorAttachmentIndices[ci];
1444                 if (resolveToBackbuffer_[index]) {
1445                     // NOTE: this could fail with multiple color attachments....
1446                     renderingToDefaultFbo_ = true;
1447                 }
1448                 if (!attachmentCleared_[index]) {
1449                     attachmentCleared_[index] = true;
1450                     colorAttachments[ci] = &rpd.attachments[index];
1451                 } else {
1452                     colorAttachments[ci] = nullptr;
1453                 }
1454             }
1455             HandleColorAttachments(array_view(colorAttachments, sb.colorAttachmentCount));
1456         }
1457         if (sb.depthAttachmentCount) {
1458             if (!attachmentCleared_[sb.depthAttachmentIndex]) {
1459                 attachmentCleared_[sb.depthAttachmentIndex] = true;
1460                 HandleDepthAttachment(rpd.attachments[sb.depthAttachmentIndex]);
1461             }
1462         }
1463     }
1464     if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1465         // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1466         SetState(GL_RASTERIZER_DISCARD, GL_TRUE);
1467     }
1468     ClearScissorReset();
1469 }
1470 
ScanPasses(const RenderPassDesc & rpd)1471 void RenderBackendGLES::ScanPasses(const RenderPassDesc& rpd)
1472 {
1473     for (uint32_t sub = 0; sub < rpd.subpassCount; sub++) {
1474         const auto& currentSubPass = activeRenderPass_.subpasses[sub];
1475         for (uint32_t ci = 0; ci < currentSubPass.resolveAttachmentCount; ci++) {
1476             uint32_t resolveTo = currentSubPass.resolveAttachmentIndices[ci];
1477             if (attachmentFirstUse_[resolveTo] == 0xFFFFFFFF) {
1478                 attachmentFirstUse_[resolveTo] = sub;
1479             }
1480             attachmentLastUse_[resolveTo] = sub;
1481             const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[resolveTo]->GetPlatformData());
1482             if ((p.image == 0) && (p.renderBuffer == 0)) {
1483                 // mark the "resolveFrom" (ie. the colorattachment) as "backbuffer-like", since we resolve to
1484                 // backbuffer...
1485                 uint32_t resolveFrom = currentSubPass.colorAttachmentIndices[ci];
1486                 resolveToBackbuffer_[resolveFrom] = true;
1487             }
1488         }
1489         for (uint32_t ci = 0; ci < currentSubPass.inputAttachmentCount; ci++) {
1490             uint32_t index = currentSubPass.inputAttachmentIndices[ci];
1491             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1492                 attachmentFirstUse_[index] = sub;
1493             }
1494             attachmentLastUse_[index] = sub;
1495         }
1496         for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1497             uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1498             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1499                 attachmentFirstUse_[index] = sub;
1500             }
1501             attachmentLastUse_[index] = sub;
1502             if (attachmentImage_[index]) {
1503                 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[index]->GetPlatformData());
1504                 if ((p.image == 0) && (p.renderBuffer == 0)) {
1505                     resolveToBackbuffer_[index] = true;
1506                 }
1507             }
1508         }
1509         if (currentSubPass.depthAttachmentCount > 0) {
1510             uint32_t index = currentSubPass.depthAttachmentIndex;
1511             if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1512                 attachmentFirstUse_[index] = sub;
1513             }
1514             attachmentLastUse_[index] = sub;
1515         }
1516     }
1517 }
1518 
RenderCommandBeginRenderPass(const RenderCommandWithType & ref)1519 void RenderBackendGLES::RenderCommandBeginRenderPass(const RenderCommandWithType& ref)
1520 {
1521     PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
1522     const auto& renderCmd = *static_cast<const struct RenderCommandBeginRenderPass*>(ref.rc);
1523     switch (renderCmd.beginType) {
1524         case RenderPassBeginType::RENDER_PASS_BEGIN: {
1525             ++inRenderpass_;
1526             PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES beginrenderpass mInRenderpass %u", inRenderpass_);
1527             activeRenderPass_ = renderCmd; // Store this because we need it later (in NextRenderPass)
1528 
1529             const auto& rpd = activeRenderPass_.renderPassDesc;
1530             renderArea_ = rpd.renderArea; // can subpasses have different render areas?
1531             auto& cpm = *(static_cast<NodeContextPoolManagerGLES*>(managers_.poolMgr));
1532             if (multisampledRenderToTexture_) {
1533                 cpm.FilterRenderPass(activeRenderPass_);
1534             }
1535             currentFrameBuffer_ = cpm.GetFramebuffer(cpm.GetFramebufferHandle(activeRenderPass_));
1536             if (currentFrameBuffer_ == nullptr) {
1537                 // Completely invalid state in backend.
1538                 commandListValid_ = false;
1539                 --inRenderpass_;
1540                 return;
1541             }
1542             PLUGIN_ASSERT_MSG(
1543                 activeRenderPass_.subpassStartIndex == 0, "activeRenderPass_.subpassStartIndex != 0 not handled!");
1544             currentSubPass_ = 0;
1545             // find first and last use, clear clearflags. (this could be cached in the lowlewel classes)
1546             for (uint32_t i = 0; i < rpd.attachmentCount; i++) {
1547                 attachmentCleared_[i] = false;
1548                 attachmentFirstUse_[i] = 0xFFFFFFFF;
1549                 attachmentLastUse_[i] = 0;
1550                 resolveToBackbuffer_[i] = false;
1551                 attachmentImage_[i] =
1552                     static_cast<const GpuImageGLES*>(gpuResourceMgr_.GetImage(rpd.attachmentHandles[i]));
1553             }
1554             ScanPasses(rpd);
1555             DoSubPass(0);
1556 #if (RENDER_PERF_ENABLED == 1)
1557             ++perfCounters_.renderPassCount;
1558 #endif
1559         } break;
1560 
1561         case RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN: {
1562             currentSubPass_ = renderCmd.subpassStartIndex;
1563             PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1564             DoSubPass(activeRenderPass_.subpassStartIndex);
1565         } break;
1566 
1567         default:
1568             break;
1569     }
1570 }
1571 
RenderCommandNextSubpass(const RenderCommandWithType & ref)1572 void RenderBackendGLES::RenderCommandNextSubpass(const RenderCommandWithType& ref)
1573 {
1574     PLUGIN_ASSERT(ref.type == RenderCommandType::NEXT_SUBPASS);
1575     const auto& renderCmd = *static_cast<const struct RenderCommandNextSubpass*>(ref.rc);
1576     PLUGIN_UNUSED(renderCmd);
1577     PLUGIN_ASSERT(renderCmd.subpassContents == SubpassContents::CORE_SUBPASS_CONTENTS_INLINE);
1578     ++currentSubPass_;
1579     PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1580     DoSubPass(currentSubPass_);
1581 }
1582 
InvalidateDepthStencil(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1583 int32_t RenderBackendGLES::InvalidateDepthStencil(
1584     array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1585 {
1586     int32_t depthCount = 0;
1587     if (currentSubPass.depthAttachmentCount == 0) {
1588         return depthCount; // early out
1589     }
1590     const uint32_t index = currentSubPass.depthAttachmentIndex;
1591     if (attachmentLastUse_[index] != currentSubPass_) {
1592         return depthCount; // early out
1593     }
1594     // is last use of the attachment
1595     const auto& image = attachmentImage_[index];
1596     const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1597     // NOTE: we expect the depth to be in FBO in this case even if there would be a depth target in render pass
1598     if ((dplat.image || dplat.renderBuffer) && (!renderingToDefaultFbo_)) {
1599         bool depth = false;
1600         bool stencil = false;
1601         if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1602             if ((dplat.format == GL_DEPTH_COMPONENT) || (dplat.format == GL_DEPTH_STENCIL)) {
1603                 depth = true;
1604             }
1605         }
1606         if (rpd.attachments[index].stencilStoreOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1607             if ((dplat.format == GL_STENCIL) || (dplat.format == GL_DEPTH_STENCIL)) {
1608                 stencil = true;
1609             }
1610         }
1611         if (depth && stencil) {
1612             invalidateAttachment[0] = GL_DEPTH_STENCIL_ATTACHMENT;
1613             depthCount++;
1614         } else if (stencil) {
1615             invalidateAttachment[0] = GL_STENCIL_ATTACHMENT;
1616             depthCount++;
1617         } else if (depth) {
1618             invalidateAttachment[0] = GL_DEPTH_ATTACHMENT;
1619             depthCount++;
1620         }
1621     }
1622     return depthCount;
1623 }
1624 
InvalidateColor(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1625 int32_t RenderBackendGLES::InvalidateColor(
1626     array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1627 {
1628     int32_t colorCount = 0;
1629     // see which parts of the fbo can be invalidated...
1630     // collect color attachment infos..
1631     for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1632         const uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1633         if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1634             if (const auto* image = attachmentImage_[index]) {
1635                 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1636                 if (dplat.image || dplat.renderBuffer) {
1637                     if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1638                         invalidateAttachment[static_cast<size_t>(colorCount)] = GL_COLOR_ATTACHMENT0 + ci;
1639                         colorCount++;
1640                     }
1641                 }
1642             }
1643         }
1644     }
1645     return colorCount;
1646 }
1647 
ResolveMSAA(const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1648 uint32_t RenderBackendGLES::ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1649 {
1650     const GLbitfield mask = ((currentSubPass.resolveAttachmentCount > 0u) ? GL_COLOR_BUFFER_BIT : 0u) |
1651                             ((currentSubPass.depthResolveAttachmentCount > 0u) ? GL_DEPTH_BUFFER_BIT : 0u);
1652     if (!mask) {
1653         return GL_FRAMEBUFFER;
1654     }
1655 
1656     if (scissorEnabled_) {
1657         glDisable(GL_SCISSOR_TEST);
1658         scissorEnabled_ = false;
1659     }
1660 
1661     // Resolve MSAA buffers.
1662     // NOTE: ARM recommends NOT to use glBlitFramebuffer here
1663     if (!currentSubPass.viewMask) {
1664         device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1665         device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1666 
1667         glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1668             static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1669             static_cast<GLint>(currentFrameBuffer_->height), mask, GL_NEAREST);
1670     } else {
1671         // Layers need to be resolved one by one. Create temporary FBOs and go through the layers.
1672         GLuint frameBuffers[2U]; // 2 : size
1673         glGenFramebuffers(2, frameBuffers); // 2 : size
1674         device_.BindReadFrameBuffer(frameBuffers[0U]);
1675         device_.BindWriteFrameBuffer(frameBuffers[1U]);
1676 
1677         const auto& srcImage =
1678             gpuResourceMgr_.GetImage(rpd.attachmentHandles[currentSubPass.colorAttachmentIndices[0U]]);
1679         const auto& srcPlat = static_cast<const GpuImagePlatformDataGL&>(srcImage->GetBasePlatformData());
1680         const auto& dstImage =
1681             gpuResourceMgr_.GetImage(rpd.attachmentHandles[currentSubPass.resolveAttachmentIndices[0U]]);
1682         const auto& dstPlat = static_cast<const GpuImagePlatformDataGL&>(dstImage->GetBasePlatformData());
1683 
1684         auto viewMask = currentSubPass.viewMask;
1685         auto layer = 0;
1686         while (viewMask) {
1687             glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcPlat.image, 0, layer);
1688             glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstPlat.image, 0, layer);
1689 
1690             glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1691                 static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1692                 static_cast<GLint>(currentFrameBuffer_->height), mask, GL_NEAREST);
1693             viewMask >>= 1U;
1694             ++layer;
1695         }
1696         glDeleteFramebuffers(2, frameBuffers); // 2 : buffer size
1697 
1698         // invalidation exepcts to find the actual FBOs
1699         device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1700         device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1701     }
1702     return GL_READ_FRAMEBUFFER;
1703 }
1704 
RenderCommandEndRenderPass(const RenderCommandWithType & ref)1705 void RenderBackendGLES::RenderCommandEndRenderPass(const RenderCommandWithType& ref)
1706 {
1707     PLUGIN_ASSERT(ref.type == RenderCommandType::END_RENDER_PASS);
1708     const auto& renderCmd = *static_cast<const struct RenderCommandEndRenderPass*>(ref.rc);
1709     if (renderCmd.endType == RenderPassEndType::END_RENDER_PASS) {
1710         PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES endrenderpass mInRenderpass %u", inRenderpass_);
1711         inRenderpass_--;
1712     }
1713     if (currentFrameBuffer_ == nullptr) {
1714         // Completely invalid state in backend.
1715         return;
1716     }
1717     const auto& rpd = activeRenderPass_.renderPassDesc;
1718     const auto& currentSubPass = activeRenderPass_.subpasses[currentSubPass_];
1719 
1720     // Resolve MSAA
1721     const uint32_t fbType = ResolveMSAA(rpd, currentSubPass);
1722 
1723     // Finally invalidate color and depth..
1724     GLenum invalidate[PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT + 1] = {};
1725     int32_t invalidateCount = InvalidateColor(invalidate, rpd, currentSubPass);
1726     invalidateCount += InvalidateDepthStencil(
1727         array_view(invalidate + invalidateCount, countof(invalidate) - invalidateCount), rpd, currentSubPass);
1728 
1729     // NOTE: all attachments should be the same size AND mCurrentFrameBuffer->width/height should match that!
1730     Invalidate(fbType, invalidateCount, invalidate, rpd, *currentFrameBuffer_);
1731 
1732     if (inRenderpass_ == 0) {
1733         currentFrameBuffer_ = nullptr;
1734     }
1735 }
1736 
RenderCommandBindVertexBuffers(const RenderCommandWithType & ref)1737 void RenderBackendGLES::RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)
1738 {
1739     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_VERTEX_BUFFERS);
1740     const auto& renderCmd = *static_cast<const struct RenderCommandBindVertexBuffers*>(ref.rc);
1741     PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1742     PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1743     if (!boundGraphicsPipeline_) {
1744         return;
1745     }
1746     vertexAttribBinds_ = renderCmd.vertexBufferCount;
1747     for (size_t i = 0; i < renderCmd.vertexBufferCount; i++) {
1748         const auto& currVb = renderCmd.vertexBuffers[i];
1749         if (const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(currVb.bufferHandle); gpuBuffer) {
1750             const auto& plat = gpuBuffer->GetPlatformData();
1751             uintptr_t offset = currVb.bufferOffset;
1752             offset += plat.currentByteOffset;
1753             vertexAttribBindSlots_[i].id = plat.buffer;
1754             vertexAttribBindSlots_[i].offset = static_cast<intptr_t>(offset);
1755         } else {
1756             vertexAttribBindSlots_[i].id = 0;
1757             vertexAttribBindSlots_[i].offset = 0;
1758         }
1759     }
1760 }
1761 
RenderCommandBindIndexBuffer(const RenderCommandWithType & ref)1762 void RenderBackendGLES::RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)
1763 {
1764     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_INDEX_BUFFER);
1765     const auto& renderCmd = *static_cast<const struct RenderCommandBindIndexBuffer*>(ref.rc);
1766     if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.indexBuffer.bufferHandle);
1767         gpuBuffer) {
1768         const auto& plat = gpuBuffer->GetPlatformData();
1769         boundIndexBuffer_.offset = renderCmd.indexBuffer.bufferOffset;
1770         boundIndexBuffer_.offset += plat.currentByteOffset;
1771         boundIndexBuffer_.type = renderCmd.indexBuffer.indexType;
1772         boundIndexBuffer_.id = plat.buffer;
1773     }
1774 }
1775 
RenderCommandBlitImage(const RenderCommandWithType & ref)1776 void RenderBackendGLES::RenderCommandBlitImage(const RenderCommandWithType& ref)
1777 {
1778     PLUGIN_ASSERT(ref.type == RenderCommandType::BLIT_IMAGE);
1779     const auto& renderCmd = *static_cast<const struct RenderCommandBlitImage*>(ref.rc);
1780     const auto* srcImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1781     const auto* dstImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1782     if ((srcImage == nullptr) || (dstImage == nullptr)) {
1783         return;
1784     }
1785     const auto& srcDesc = srcImage->GetDesc();
1786     const auto& srcPlat = srcImage->GetPlatformData();
1787     const auto& dstDesc = dstImage->GetDesc();
1788     const auto& dstPlat = dstImage->GetPlatformData();
1789     const auto& srcRect = renderCmd.imageBlit.srcOffsets;
1790     const auto& dstRect = renderCmd.imageBlit.dstOffsets;
1791     const auto& src = renderCmd.imageBlit.srcSubresource;
1792     const auto& dst = renderCmd.imageBlit.dstSubresource;
1793     const auto srcMipLevel = static_cast<GLint>(src.mipLevel);
1794     const auto dstMipLevel = static_cast<GLint>(dst.mipLevel);
1795     const auto srcSampleCount = static_cast<uint32_t>(srcDesc.sampleCountFlags);
1796     const auto dstSampleCount = static_cast<uint32_t>(dstDesc.sampleCountFlags);
1797     PLUGIN_ASSERT_MSG(src.layerCount == dst.layerCount, "Source and Destination layercounts do not match!");
1798     PLUGIN_ASSERT_MSG(inRenderpass_ == 0, "RenderCommandBlitImage while inRenderPass");
1799     glDisable(GL_SCISSOR_TEST);
1800     scissorEnabled_ = false;
1801     // NOTE: LAYERS! (texture arrays)
1802     device_.BindReadFrameBuffer(blitImageSourceFbo_);
1803     device_.BindWriteFrameBuffer(blitImageDestinationFbo_);
1804     for (uint32_t layer = 0; layer < src.layerCount; layer++) {
1805         const GLenum srcType = GetTarget(srcPlat.type, layer, srcSampleCount);
1806         const GLenum dstType = GetTarget(dstPlat.type, layer, dstSampleCount);
1807         // glFramebufferTextureLayer for array textures....
1808         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, srcPlat.image, srcMipLevel);
1809         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, dstPlat.image, dstMipLevel);
1810         DoBlit(renderCmd.filter, { src.mipLevel, srcRect[0], srcRect[1], srcDesc.height },
1811             { dst.mipLevel, dstRect[0], dstRect[1], dstDesc.height });
1812         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, 0, 0);
1813         glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, 0, 0);
1814     }
1815 }
1816 
RenderCommandCopyBuffer(const RenderCommandWithType & ref)1817 void RenderBackendGLES::RenderCommandCopyBuffer(const RenderCommandWithType& ref)
1818 {
1819     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER);
1820     const auto& renderCmd = *static_cast<const struct RenderCommandCopyBuffer*>(ref.rc);
1821     const auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1822     const auto* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.dstHandle);
1823     if (srcGpuBuffer && dstGpuBuffer) {
1824         const auto& srcData = srcGpuBuffer->GetPlatformData();
1825         const auto& dstData = dstGpuBuffer->GetPlatformData();
1826         const auto oldBindR = device_.BoundBuffer(GL_COPY_READ_BUFFER);
1827         const auto oldBindW = device_.BoundBuffer(GL_COPY_WRITE_BUFFER);
1828         device_.BindBuffer(GL_COPY_READ_BUFFER, srcData.buffer);
1829         device_.BindBuffer(GL_COPY_WRITE_BUFFER, dstData.buffer);
1830         glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
1831             static_cast<GLintptr>(renderCmd.bufferCopy.srcOffset),
1832             static_cast<GLintptr>(renderCmd.bufferCopy.dstOffset), static_cast<GLsizeiptr>(renderCmd.bufferCopy.size));
1833         device_.BindBuffer(GL_COPY_READ_BUFFER, oldBindR);
1834         device_.BindBuffer(GL_COPY_WRITE_BUFFER, oldBindW);
1835     }
1836 }
1837 
BufferToImageCopy(const struct RenderCommandCopyBufferImage & renderCmd)1838 void RenderBackendGLES::BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1839 {
1840 #if (RENDER_HAS_GLES_BACKEND == 1) & defined(_WIN32)
1841     // use the workaround only for gles backend on windows. (pvr simulator bug)
1842     constexpr const bool usePixelUnpackBuffer = false;
1843 #else
1844     // expect this to work, and the nvidia bug to be fixed.
1845     constexpr const bool usePixelUnpackBuffer = true;
1846 #endif
1847     auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1848     auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1849     if ((srcGpuBuffer == nullptr) || (dstGpuImage == nullptr)) {
1850         return;
1851     }
1852     const auto info = SetupBlit<usePixelUnpackBuffer>(device_, renderCmd.bufferImageCopy, *srcGpuBuffer, *dstGpuImage);
1853     if (info.iPlat.type == GL_TEXTURE_CUBE_MAP) {
1854         BlitCube(device_, info);
1855     } else if (info.iPlat.type == GL_TEXTURE_2D) {
1856         Blit2D(device_, info);
1857     } else if (info.iPlat.type == GL_TEXTURE_2D_ARRAY) {
1858         BlitArray(device_, info);
1859     } else if (info.iPlat.type == GL_TEXTURE_3D) {
1860         Blit3D(device_, info);
1861 #if RENDER_HAS_GLES_BACKEND
1862     } else if (info.iPlat.type == GL_TEXTURE_EXTERNAL_OES) {
1863         PLUGIN_LOG_E("Tried to copy to GL_TEXTURE_EXTERNAL_OES. Ignored!");
1864 #endif
1865     } else {
1866         PLUGIN_ASSERT_MSG(false, "RenderCommandCopyBufferImage unhandled type");
1867     }
1868     FinishBlit<usePixelUnpackBuffer>(device_, *srcGpuBuffer);
1869 }
1870 
ImageToBufferCopy(const struct RenderCommandCopyBufferImage & renderCmd)1871 void RenderBackendGLES::ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1872 {
1873     const auto& bc = renderCmd.bufferImageCopy;
1874     const auto* srcGpuImage = static_cast<GpuImageGLES*>(gpuResourceMgr_.GetImage(renderCmd.srcHandle));
1875     const auto* dstGpuBuffer = static_cast<GpuBufferGLES*>(gpuResourceMgr_.GetBuffer(renderCmd.dstHandle));
1876     PLUGIN_ASSERT(srcGpuImage);
1877     PLUGIN_ASSERT(dstGpuBuffer);
1878     if ((srcGpuImage == nullptr) || (dstGpuBuffer == nullptr)) {
1879         return;
1880     }
1881     const auto& iPlat = static_cast<const GpuImagePlatformDataGL&>(srcGpuImage->GetPlatformData());
1882     const auto& bPlat = static_cast<const GpuBufferPlatformDataGL&>(dstGpuBuffer->GetPlatformData());
1883     if ((iPlat.type != GL_TEXTURE_CUBE_MAP) && (iPlat.type != GL_TEXTURE_2D)) {
1884         PLUGIN_LOG_E("Unsupported texture type in ImageToBufferCopy %x", iPlat.type);
1885         return;
1886     }
1887     device_.BindReadFrameBuffer(blitImageSourceFbo_);
1888     PLUGIN_ASSERT(bc.imageSubresource.layerCount == 1);
1889     GLenum type = GL_TEXTURE_2D;
1890     if (iPlat.type == GL_TEXTURE_CUBE_MAP) {
1891         type = GetCubeMapTarget(iPlat.type, bc.imageSubresource.baseArrayLayer);
1892     }
1893     // glFramebufferTextureLayer for array textures....
1894     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, static_cast<GLuint>(iPlat.image),
1895         static_cast<GLint>(bc.imageSubresource.mipLevel));
1896     const Math::UVec2 sPos { bc.imageOffset.width, bc.imageOffset.height };
1897     const Math::UVec2 sExt { bc.imageExtent.width, bc.imageExtent.height };
1898     device_.BindBuffer(GL_PIXEL_PACK_BUFFER, bPlat.buffer);
1899     glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(bc.bufferRowLength));
1900     glPixelStorei(GL_PACK_ALIGNMENT, 1);
1901     uintptr_t dstOffset = bc.bufferOffset + bPlat.currentByteOffset;
1902     glReadnPixels(static_cast<GLint>(sPos.x), static_cast<GLint>(sPos.y), static_cast<GLsizei>(sExt.x),
1903         static_cast<GLsizei>(sExt.y), iPlat.format, static_cast<GLenum>(iPlat.dataType),
1904         static_cast<GLsizei>(bPlat.alignedByteSize), reinterpret_cast<void*>(dstOffset));
1905     device_.BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
1906     glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, 0, 0);
1907 }
1908 
RenderCommandCopyBufferImage(const RenderCommandWithType & ref)1909 void RenderBackendGLES::RenderCommandCopyBufferImage(const RenderCommandWithType& ref)
1910 {
1911     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER_IMAGE);
1912     const auto& renderCmd = *static_cast<const struct RenderCommandCopyBufferImage*>(ref.rc);
1913     PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1914     if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1915         BufferToImageCopy(renderCmd);
1916     } else if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1917         ImageToBufferCopy(renderCmd);
1918     }
1919 }
1920 
RenderCommandCopyImage(const RenderCommandWithType & ref)1921 void RenderBackendGLES::RenderCommandCopyImage(const RenderCommandWithType& ref)
1922 {
1923     PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_IMAGE);
1924     const auto& renderCmd = *static_cast<const struct RenderCommandCopyImage*>(ref.rc);
1925     PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1926     const auto* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1927     const auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1928     if ((srcGpuImage == nullptr) || (dstGpuImage == nullptr)) {
1929         return;
1930     }
1931     const auto& srcDesc = srcGpuImage->GetDesc();
1932     const auto& dstDesc = dstGpuImage->GetDesc();
1933 #if RENDER_VALIDATION_ENABLED
1934     ValidateCopyImage(renderCmd.imageCopy, srcDesc, dstDesc);
1935 #endif
1936     const auto srcMipLevel =
1937         static_cast<GLint>(Math::min(renderCmd.imageCopy.srcSubresource.mipLevel, srcDesc.mipCount - 1));
1938     const auto dstMipLevel =
1939         static_cast<GLint>(Math::min(renderCmd.imageCopy.dstSubresource.mipLevel, dstDesc.mipCount - 1));
1940 
1941     auto sOffset = renderCmd.imageCopy.srcOffset;
1942     auto dOffset = renderCmd.imageCopy.dstOffset;
1943     auto size = renderCmd.imageCopy.extent;
1944 
1945     // clamp negative offsets to zero and adjust extent and other offset accordingly
1946     ClampOffset(sOffset, dOffset, size);
1947     ClampOffset(dOffset, sOffset, size);
1948 
1949     // clamp size to fit src and dst
1950     ClampSize(sOffset, srcDesc, size);
1951     ClampSize(dOffset, dstDesc, size);
1952 
1953     const auto& srcPlatData = srcGpuImage->GetPlatformData();
1954     const auto& dstPlatData = dstGpuImage->GetPlatformData();
1955     glCopyImageSubData(srcPlatData.image, srcPlatData.type, srcMipLevel, sOffset.x, sOffset.y, sOffset.z,
1956         dstPlatData.image, dstPlatData.type, dstMipLevel, dOffset.x, dOffset.y, dOffset.z,
1957         static_cast<GLsizei>(size.width), static_cast<GLsizei>(size.height), static_cast<GLsizei>(size.depth));
1958 }
1959 
RenderCommandBarrierPoint(const RenderCommandWithType & ref)1960 void RenderBackendGLES::RenderCommandBarrierPoint(const RenderCommandWithType& ref)
1961 {
1962     PLUGIN_ASSERT(ref.type == RenderCommandType::BARRIER_POINT);
1963     const auto& renderCmd = *static_cast<const struct RenderCommandBarrierPoint*>(ref.rc);
1964     const auto& rbList = *managers_.rbList;
1965     // NOTE: proper flagging of barriers.
1966     const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1967         rbList.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1968     if (!barrierPointBarriers) {
1969         return; // early out
1970     }
1971     const uint32_t barrierListCount = barrierPointBarriers->barrierListCount;
1972     const auto* nextBarrierList = barrierPointBarriers->firstBarrierList;
1973     GLbitfield barriers = 0;
1974     GLbitfield barriersByRegion = 0;
1975     for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1976         if (nextBarrierList == nullptr) {
1977             // cannot be null, just a safety
1978             PLUGIN_ASSERT(false);
1979             return;
1980         }
1981         const auto& barrierListRef = *nextBarrierList;
1982         nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1983         const uint32_t barrierCount = barrierListRef.count;
1984 
1985         for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1986             const auto& barrier = barrierListRef.commandBarriers[barrierIdx];
1987 
1988             // check if written by previous shader as an attachment or storage/ image buffer
1989             if (barrier.src.accessFlags & (CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1990                                               CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
1991                 const auto resourceHandle = barrier.resourceHandle;
1992                 const auto handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1993 
1994                 // barrier by region is between fragment shaders and supports a subset of barriers.
1995                 if ((barrier.src.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) &&
1996                     (barrier.dst.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
1997                     barriersByRegion |= CommonBarrierBits(barrier.dst.accessFlags, handleType);
1998                 } else {
1999                     // check the barriers shared with ByRegion
2000                     barriers |= CommonBarrierBits(barrier.dst.accessFlags, handleType);
2001 
2002                     // the rest are invalid for ByRegion
2003                     if (barrier.dst.accessFlags & CORE_ACCESS_INDIRECT_COMMAND_READ_BIT) {
2004                         barriers |= GL_COMMAND_BARRIER_BIT;
2005                     }
2006                     if (barrier.dst.accessFlags & CORE_ACCESS_INDEX_READ_BIT) {
2007                         barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
2008                     }
2009                     if (barrier.dst.accessFlags & CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
2010                         barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
2011                     }
2012                     // which are the correct accessFlags?
2013                     // GL_PIXEL_BUFFER_BARRIER_BIT:
2014                     // - buffer objects via the GL_PIXEL_PACK_BUFFER and GL_PIXEL_UNPACK_BUFFER bindings (via
2015                     // glReadPixels, glTexSubImage1D, etc.)
2016                     // GL_TEXTURE_UPDATE_BARRIER_BIT:
2017                     // - texture via glTex(Sub)Image*, glCopyTex(Sub)Image*, glCompressedTex(Sub)Image*, and
2018                     // reads via glGetTexImage GL_BUFFER_UPDATE_BARRIER_BIT:
2019                     // - glBufferSubData, glCopyBufferSubData, or glGetBufferSubData, or to buffer object memory
2020                     // mapped
2021                     //  by glMapBuffer or glMapBufferRange
2022                     // These two are cover all memory access, CORE_ACCESS_MEMORY_READ_BIT,
2023                     // CORE_ACCESS_MEMORY_WRITE_BIT?
2024                     if (barrier.dst.accessFlags & (CORE_ACCESS_TRANSFER_READ_BIT | CORE_ACCESS_TRANSFER_WRITE_BIT |
2025                                                       CORE_ACCESS_HOST_READ_BIT | CORE_ACCESS_HOST_WRITE_BIT)) {
2026                         if (handleType == RenderHandleType::GPU_IMAGE) {
2027                             barriers |= GL_TEXTURE_UPDATE_BARRIER_BIT;
2028                         } else if (handleType == RenderHandleType::GPU_BUFFER) {
2029                             barriers |= GL_BUFFER_UPDATE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT;
2030                         }
2031                     }
2032                     // GL_TRANSFORM_FEEDBACK_BARRIER_BIT is not used at the moment
2033                 }
2034             }
2035         }
2036     }
2037     if (barriers) {
2038         glMemoryBarrier(barriers);
2039     }
2040     if (barriersByRegion) {
2041         // only for fragment-fragment
2042         glMemoryBarrierByRegion(barriersByRegion);
2043     }
2044 }
2045 
SetupBind(const DescriptorSetLayoutBinding & binding,vector<Gles::Bind> & resources)2046 Gles::Bind& RenderBackendGLES::SetupBind(const DescriptorSetLayoutBinding& binding, vector<Gles::Bind>& resources)
2047 {
2048     PLUGIN_ASSERT(binding.binding < resources.size());
2049     auto& obj = resources[binding.binding];
2050     PLUGIN_ASSERT(obj.resources.size() == binding.descriptorCount);
2051     PLUGIN_ASSERT(obj.descriptorType == binding.descriptorType);
2052     return obj;
2053 }
2054 
BindSampler(const BindableSampler & res,Gles::Bind & obj,uint32_t index)2055 void RenderBackendGLES::BindSampler(const BindableSampler& res, Gles::Bind& obj, uint32_t index)
2056 {
2057     const auto* gpuSampler = gpuResourceMgr_.GetSampler<GpuSamplerGLES>(res.handle);
2058     if (gpuSampler) {
2059         const auto& plat = gpuSampler->GetPlatformData();
2060         obj.resources[index].sampler.samplerId = plat.sampler;
2061     } else {
2062         obj.resources[index].sampler.samplerId = 0;
2063     }
2064 }
2065 
BindImage(const BindableImage & res,const GpuResourceState & resState,Gles::Bind & obj,uint32_t index)2066 void RenderBackendGLES::BindImage(
2067     const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
2068 {
2069     const AccessFlags accessFlags = resState.accessFlags;
2070     auto* gpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(res.handle);
2071     auto& ref = obj.resources[index];
2072     ref.image.image = gpuImage;
2073     const bool read = IS_BIT(accessFlags, CORE_ACCESS_SHADER_READ_BIT);
2074     const bool write = IS_BIT(accessFlags, CORE_ACCESS_SHADER_WRITE_BIT);
2075     if (read && write) {
2076         ref.image.mode = GL_READ_WRITE;
2077     } else if (read) {
2078         ref.image.mode = GL_READ_ONLY;
2079     } else if (write) {
2080         ref.image.mode = GL_WRITE_ONLY;
2081     } else {
2082         // no read and no write?
2083         ref.image.mode = GL_READ_WRITE;
2084     }
2085     ref.image.mipLevel = res.mip;
2086 }
2087 
BindImageSampler(const BindableImage & res,const GpuResourceState & resState,Gles::Bind & obj,uint32_t index)2088 void RenderBackendGLES::BindImageSampler(
2089     const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
2090 {
2091     BindImage(res, resState, obj, index);
2092     BindSampler(BindableSampler { res.samplerHandle }, obj, index);
2093 }
2094 
BindBuffer(const BindableBuffer & res,Gles::Bind & obj,uint32_t dynamicOffset,uint32_t index)2095 void RenderBackendGLES::BindBuffer(const BindableBuffer& res, Gles::Bind& obj, uint32_t dynamicOffset, uint32_t index)
2096 {
2097     const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(res.handle);
2098     if (gpuBuffer) {
2099         const auto& plat = gpuBuffer->GetPlatformData();
2100         const uint32_t baseOffset = res.byteOffset;
2101         obj.resources[index].buffer.offset = baseOffset + plat.currentByteOffset + dynamicOffset;
2102         obj.resources[index].buffer.size = std::min(plat.bindMemoryByteSize - baseOffset, res.byteSize);
2103         obj.resources[index].buffer.bufferId = plat.buffer;
2104     } else {
2105         obj.resources[index].buffer.offset = 0;
2106         obj.resources[index].buffer.size = 0;
2107         obj.resources[index].buffer.bufferId = 0;
2108     }
2109 }
2110 
ProcessBindings(const struct RenderCommandBindDescriptorSets & renderCmd,const DescriptorSetLayoutBindingResourcesHandler & data,uint32_t set)2111 void RenderBackendGLES::ProcessBindings(const struct RenderCommandBindDescriptorSets& renderCmd,
2112     const DescriptorSetLayoutBindingResourcesHandler& data, uint32_t set)
2113 {
2114     BindState& bind = boundObjects_[set];
2115     vector<Gles::Bind>& resources = bind.resources;
2116 #if RENDER_HAS_GLES_BACKEND
2117     bind.oesBinds.clear();
2118 #endif
2119     const auto& dynamicOffsets = renderCmd.descriptorSetDynamicOffsets[set];
2120     const auto& buffers = data.buffers;
2121     const auto& images = data.images;
2122     const auto& samplers = data.samplers;
2123     uint32_t currDynamic = 0U;
2124     for (const auto& res : data.bindings) {
2125         if (res.binding.binding >= resources.size()) {
2126             continue;
2127         }
2128         auto& obj = SetupBind(res.binding, resources);
2129 #if RENDER_HAS_GLES_BACKEND
2130         bool hasOes = false;
2131 #endif
2132         const bool hasArrOffset = (res.binding.descriptorCount > 1);
2133         const uint32_t arrayOffset = hasArrOffset ? GetArrayOffset(data, res) : 0;
2134         for (uint32_t index = 0; index < res.binding.descriptorCount; index++) {
2135             const uint32_t resIdx = (index == 0) ? res.resourceIndex : (arrayOffset + index - 1);
2136             [[maybe_unused]] GpuImageGLES* image = nullptr;
2137             switch (res.binding.descriptorType) {
2138                 case CORE_DESCRIPTOR_TYPE_SAMPLER: {
2139                     const auto& bRes = samplers[resIdx].desc;
2140                     BindSampler(bRes.resource, obj, index);
2141                     break;
2142                 }
2143                 case CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2144                 case CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2145                 case CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
2146                     const auto& bRes = images[resIdx].desc;
2147                     BindImage(bRes.resource, bRes.state, obj, index);
2148                     image = obj.resources[index].image.image;
2149                     break;
2150                 }
2151                 case CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
2152                     const auto& bRes = images[resIdx].desc;
2153                     BindImageSampler(bRes.resource, bRes.state, obj, index);
2154                     image = obj.resources[index].image.image;
2155                     break;
2156                 }
2157                 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2158                 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
2159                     const auto& bRes = buffers[resIdx].desc;
2160                     uint32_t dynamicOffset = 0;
2161                     if (currDynamic < dynamicOffsets.dynamicOffsetCount) {
2162                         dynamicOffset = dynamicOffsets.dynamicOffsets[currDynamic];
2163                         currDynamic++;
2164                     }
2165                     BindBuffer(bRes.resource, obj, dynamicOffset, index);
2166                     break;
2167                 }
2168                 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2169                 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
2170                     const auto& bRes = buffers[resIdx].desc;
2171                     BindBuffer(bRes.resource, obj, 0, index);
2172                     break;
2173                 }
2174                 case CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2175                 case CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2176                 case CORE_DESCRIPTOR_TYPE_MAX_ENUM:
2177                 default:
2178                     PLUGIN_ASSERT_MSG(false, "Unhandled descriptor type");
2179                     break;
2180             }
2181 #if RENDER_HAS_GLES_BACKEND
2182             if ((image) && (image->GetPlatformData().type == GL_TEXTURE_EXTERNAL_OES)) {
2183                 hasOes = true;
2184             }
2185 #endif
2186         }
2187 #if RENDER_HAS_GLES_BACKEND
2188         if (hasOes) {
2189             bind.oesBinds.push_back(OES_Bind { (uint8_t)set, (uint8_t)res.binding.binding });
2190         }
2191 #endif
2192     }
2193 }
2194 
RenderCommandBindDescriptorSets(const RenderCommandWithType & ref)2195 void RenderBackendGLES::RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)
2196 {
2197     PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_DESCRIPTOR_SETS);
2198     if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2199         return;
2200     }
2201     const auto& renderCmd = *static_cast<const struct RenderCommandBindDescriptorSets*>(ref.rc);
2202     PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2203 
2204     const auto& aNcdsm = *managers_.descriptorSetMgr;
2205     for (uint32_t idx = renderCmd.firstSet; idx < renderCmd.firstSet + renderCmd.setCount; ++idx) {
2206         PLUGIN_ASSERT_MSG(idx < Gles::ResourceLimits::MAX_SETS, "Invalid descriptorset index");
2207         const auto descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2208         PLUGIN_ASSERT(RenderHandleUtil::IsValid(descriptorSetHandle));
2209         const auto& data = aNcdsm.GetCpuDescriptorSetData(descriptorSetHandle);
2210         boundObjects_[idx].dirty = true; // mark the set as "changed"
2211         ProcessBindings(renderCmd, data, idx);
2212         // (note, nothing actually gets bound yet.. just the bind cache is updated)
2213     }
2214 }
2215 
SetPushConstant(uint32_t program,const Gles::PushConstantReflection & pc,const void * data)2216 void RenderBackendGLES::SetPushConstant(uint32_t program, const Gles::PushConstantReflection& pc, const void* data)
2217 {
2218     const auto location = static_cast<GLint>(pc.location);
2219     // the consts list has been filtered and cleared of unused uniforms.
2220     PLUGIN_ASSERT(location != Gles::INVALID_LOCATION);
2221     GLint count = Math::max(static_cast<GLint>(pc.arraySize), 1);
2222     switch (pc.type) {
2223         case GL_UNSIGNED_INT: {
2224             glProgramUniform1uiv(program, location, count, static_cast<const GLuint*>(data));
2225             break;
2226         }
2227         case GL_FLOAT: {
2228             glProgramUniform1fv(program, location, count, static_cast<const GLfloat*>(data));
2229             break;
2230         }
2231         case GL_FLOAT_VEC2: {
2232             glProgramUniform2fv(program, location, count, static_cast<const GLfloat*>(data));
2233             break;
2234         }
2235         case GL_FLOAT_VEC4: {
2236             glProgramUniform4fv(program, location, count, static_cast<const GLfloat*>(data));
2237             break;
2238         }
2239         case GL_FLOAT_MAT4: {
2240             glProgramUniformMatrix4fv(program, location, count, false, static_cast<const GLfloat*>(data));
2241             break;
2242         }
2243         case GL_UNSIGNED_INT_VEC4: {
2244             glProgramUniform4uiv(program, location, count, static_cast<const GLuint*>(data));
2245             break;
2246         }
2247         default:
2248             PLUGIN_ASSERT_MSG(false, "Unhandled pushconstant variable type");
2249     }
2250 }
2251 
SetPushConstants(uint32_t program,const array_view<Gles::PushConstantReflection> & consts)2252 void RenderBackendGLES::SetPushConstants(uint32_t program, const array_view<Gles::PushConstantReflection>& consts)
2253 {
2254     if (boundProgram_.setPushConstants) {
2255         boundProgram_.setPushConstants = false;
2256         const auto& renderCmd = boundProgram_.pushConstants;
2257         PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2258         PLUGIN_ASSERT_MSG(renderCmd.pushConstant.byteSize > 0, "PushConstant byteSize is zero!");
2259         PLUGIN_ASSERT_MSG(renderCmd.data, "PushConstant data is nullptr!");
2260         if ((renderCmd.data == nullptr) || (renderCmd.pushConstant.byteSize == 0))
2261             return;
2262         // ASSERT: expecting data is valid
2263         // NOTE: handle rest of the types
2264         for (const auto& pc : consts) {
2265             const size_t offs = pc.offset;
2266             if ((offs + pc.size) > renderCmd.pushConstant.byteSize) {
2267                 PLUGIN_LOG_E(
2268                     "pushConstant data invalid (data for %s is missing [offset:%zu size:%zu] byteSize of data:%u)",
2269                     pc.name.c_str(), pc.offset, pc.size, renderCmd.pushConstant.byteSize);
2270                 continue;
2271             }
2272             /*
2273             NOTE: handle the strides....
2274             consts[i].array_stride;
2275             consts[i].matrix_stride; */
2276             SetPushConstant(program, pc, &renderCmd.data[offs]);
2277         }
2278     }
2279 }
2280 
RenderCommandPushConstant(const RenderCommandWithType & ref)2281 void RenderBackendGLES::RenderCommandPushConstant(const RenderCommandWithType& ref)
2282 {
2283     PLUGIN_ASSERT(ref.type == RenderCommandType::PUSH_CONSTANT);
2284     if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2285         return;
2286     }
2287     const auto& renderCmd = *static_cast<const struct RenderCommandPushConstant*>(ref.rc);
2288     if (renderCmd.pushConstant.byteSize > 0) {
2289         PLUGIN_ASSERT(renderCmd.data);
2290         PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2291         boundProgram_.setPushConstants = true;
2292         boundProgram_.pushConstants = renderCmd;
2293     }
2294 }
2295 
RenderCommandClearColorImage(const RenderCommandWithType & ref)2296 void RenderBackendGLES::RenderCommandClearColorImage(const RenderCommandWithType& ref)
2297 {
2298     PLUGIN_ASSERT(ref.type == RenderCommandType::CLEAR_COLOR_IMAGE);
2299 #if RENDER_HAS_GLES_BACKEND
2300 #if (RENDER_VALIDATION_ENABLED == 1)
2301     PLUGIN_LOG_ONCE_E("RenderBackendGLES::RenderCommandClearColorImage",
2302         "Render command clear color image not support with GLES. One should implement higher level path for "
2303         "clearing.");
2304 #endif
2305 #else
2306     const auto& renderCmd = *static_cast<const struct RenderCommandClearColorImage*>(ref.rc);
2307 
2308     const GpuImageGLES* imagePtr = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.handle);
2309     if (imagePtr) {
2310         const GpuImagePlatformDataGL& platImage = imagePtr->GetPlatformData();
2311         // NOTE: mip levels and array layers should be handled separately
2312         for (const auto& subresRef : renderCmd.ranges) {
2313             glClearTexImage(platImage.image,     // texture
2314                 (int32_t)subresRef.baseMipLevel, // level
2315                 platImage.format,                // format
2316                 platImage.dataType,              // type
2317                 &renderCmd.color);               // data
2318         }
2319     }
2320 #endif
2321 }
2322 
2323 // dynamic states
RenderCommandDynamicStateViewport(const RenderCommandWithType & ref)2324 void RenderBackendGLES::RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)
2325 {
2326     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_VIEWPORT);
2327     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateViewport*>(ref.rc);
2328     const ViewportDesc& vd = renderCmd.viewportDesc;
2329     SetViewport(renderArea_, vd);
2330 }
2331 
RenderCommandDynamicStateScissor(const RenderCommandWithType & ref)2332 void RenderBackendGLES::RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)
2333 {
2334     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_SCISSOR);
2335     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateScissor*>(ref.rc);
2336     const ScissorDesc& sd = renderCmd.scissorDesc;
2337     SetScissor(renderArea_, sd);
2338 }
2339 
RenderCommandDynamicStateLineWidth(const RenderCommandWithType & ref)2340 void RenderBackendGLES::RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)
2341 {
2342     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_LINE_WIDTH);
2343     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateLineWidth*>(ref.rc);
2344     if (renderCmd.lineWidth != cacheState_.rasterizationState.lineWidth) {
2345         cacheState_.rasterizationState.lineWidth = renderCmd.lineWidth;
2346         glLineWidth(renderCmd.lineWidth);
2347     }
2348 }
2349 
RenderCommandDynamicStateDepthBias(const RenderCommandWithType & ref)2350 void RenderBackendGLES::RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)
2351 {
2352     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS);
2353     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBias not implemented");
2354 }
2355 
RenderCommandDynamicStateBlendConstants(const RenderCommandWithType & ref)2356 void RenderBackendGLES::RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)
2357 {
2358     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS);
2359     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateBlendConstants not implemented");
2360 }
2361 
RenderCommandDynamicStateDepthBounds(const RenderCommandWithType & ref)2362 void RenderBackendGLES::RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)
2363 {
2364     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS);
2365     PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBounds not implemented");
2366 }
2367 
SetStencilState(const uint32_t frontFlags,const GraphicsState::StencilOpState & front,const uint32_t backFlags,const GraphicsState::StencilOpState & back)2368 void RenderBackendGLES::SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front,
2369     const uint32_t backFlags, const GraphicsState::StencilOpState& back)
2370 {
2371     auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2372     auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2373     const uint32_t FUNCMASK =
2374         (StencilSetFlags::SETCOMPAREOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETREFERENCE);
2375     if (frontFlags & StencilSetFlags::SETWRITEMASK) {
2376         cFront.writeMask = front.writeMask;
2377         glStencilMaskSeparate(GL_FRONT, cFront.writeMask);
2378     }
2379     if (frontFlags & FUNCMASK) {
2380         SetStencilCompareOp(cFront, front);
2381         glStencilFuncSeparate(
2382             GL_FRONT, GetCompareOp(cFront.compareOp), static_cast<GLint>(cFront.reference), cFront.compareMask);
2383     }
2384     if (frontFlags & StencilSetFlags::SETOP) {
2385         SetStencilOp(cFront, front);
2386         glStencilOpSeparate(
2387             GL_FRONT, GetStencilOp(cFront.failOp), GetStencilOp(cFront.depthFailOp), GetStencilOp(cFront.passOp));
2388     }
2389     if (backFlags & StencilSetFlags::SETWRITEMASK) {
2390         cBack.writeMask = back.writeMask;
2391         glStencilMaskSeparate(GL_BACK, cBack.writeMask);
2392     }
2393     if (backFlags & FUNCMASK) {
2394         SetStencilCompareOp(cBack, back);
2395         glStencilFuncSeparate(
2396             GL_BACK, GetCompareOp(cBack.compareOp), static_cast<GLint>(cBack.reference), cBack.compareMask);
2397     }
2398     if (backFlags & StencilSetFlags::SETOP) {
2399         SetStencilOp(cBack, back);
2400         glStencilOpSeparate(
2401             GL_FRONT, GetStencilOp(cBack.failOp), GetStencilOp(cBack.depthFailOp), GetStencilOp(cBack.passOp));
2402     }
2403 }
2404 
RenderCommandDynamicStateStencil(const RenderCommandWithType & ref)2405 void RenderBackendGLES::RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)
2406 {
2407     PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_STENCIL);
2408     const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateStencil*>(ref.rc);
2409     auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2410     auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2411     uint32_t setFront = 0;
2412     uint32_t setBack = 0;
2413     if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_FRONT_BIT) {
2414         if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2415             if (renderCmd.mask != cFront.compareMask) {
2416                 cFront.compareMask = renderCmd.mask;
2417                 setFront |= StencilSetFlags::SETCOMPAREMASK;
2418             }
2419         } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2420             if (renderCmd.mask != cFront.writeMask) {
2421                 cFront.writeMask = renderCmd.mask;
2422                 setFront |= StencilSetFlags::SETWRITEMASK;
2423             }
2424         } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2425             if (renderCmd.mask != cFront.reference) {
2426                 cFront.reference = renderCmd.mask;
2427                 setFront |= StencilSetFlags::SETREFERENCE;
2428             }
2429         }
2430     }
2431     if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_BACK_BIT) {
2432         if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2433             if (renderCmd.mask != cBack.compareMask) {
2434                 cBack.compareMask = renderCmd.mask;
2435                 setBack |= StencilSetFlags::SETCOMPAREMASK;
2436             }
2437         } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2438             if (renderCmd.mask != cBack.writeMask) {
2439                 cBack.writeMask = renderCmd.mask;
2440                 setBack |= StencilSetFlags::SETWRITEMASK;
2441             }
2442         } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2443             if (renderCmd.mask != cBack.reference) {
2444                 cBack.reference = renderCmd.mask;
2445                 setBack |= StencilSetFlags::SETREFERENCE;
2446             }
2447         }
2448     }
2449     SetStencilState(setFront, cFront, setBack, cBack);
2450 }
2451 
RenderCommandFragmentShadingRate(const RenderCommandWithType & renderCmd)2452 void RenderBackendGLES::RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)
2453 {
2454 #if (RENDER_VALIDATION_ENABLED == 1)
2455     PLUGIN_LOG_ONCE_I("gles_RenderCommandFragmentShadingRate",
2456         "RENDER_VALIDATION: Fragment shading rate not available with GL(ES) backend.");
2457 #endif
2458 }
2459 
RenderCommandExecuteBackendFramePosition(const RenderCommandWithType & renderCmd)2460 void RenderBackendGLES::RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)
2461 {
2462     PLUGIN_ASSERT_MSG(false, "RenderCommandExecuteBackendFramePosition not implemented");
2463 }
2464 
RenderCommandWriteTimestamp(const RenderCommandWithType & renderCmd)2465 void RenderBackendGLES::RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)
2466 {
2467     PLUGIN_ASSERT_MSG(false, "RenderCommandWriteTimestamp not implemented");
2468 }
2469 
BindVertexInputs(const VertexInputDeclarationData & decldata,const array_view<const int32_t> & vertexInputs)2470 void RenderBackendGLES::BindVertexInputs(
2471     const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)
2472 {
2473     // update bindings for the VAO.
2474     // process with attribute descriptions to only bind the needed vertex buffers
2475     // NOTE: that there are or might be extran bindings in the decldata.bindingDescriptions,
2476     // but we only bind the ones needed for the shader
2477     const uint32_t minBinding = Math::min(vertexAttribBinds_, decldata.attributeDescriptionCount);
2478     for (uint32_t i = 0; i < minBinding; ++i) {
2479         const auto& attributeRef = decldata.attributeDescriptions[i];
2480         const uint32_t location = attributeRef.location;
2481         const uint32_t binding = attributeRef.binding;
2482         // NOTE: we need to bind all the buffers to the correct bindings.
2483         // shader optimized check (vertexInputs, some locations are not in use)
2484         if ((location != ~0u) && (binding != ~0u) && (vertexInputs[location] != Gles::INVALID_LOCATION)) {
2485             const auto& slot = vertexAttribBindSlots_[binding];
2486             const auto& bindingRef = decldata.bindingDescriptions[binding];
2487             PLUGIN_ASSERT(bindingRef.binding == binding);
2488             // buffer bound to slot, and it's used by the shader.
2489             device_.BindVertexBuffer(binding, slot.id, slot.offset, static_cast<intptr_t>(bindingRef.stride));
2490             /*
2491             core/vulkan
2492             bindingRef.vertexInputRate =  CORE_VERTEX_INPUT_RATE_VERTEX (0)  attribute index advances per vertex
2493             bindingRef.vertexInputRate =  CORE_VERTEX_INPUT_RATE_INSTANCE (1)  attribute index advances per instance
2494 
2495             gl/gles
2496             If divisor is  0, the attributes using the buffer bound to bindingindex advance once per vertex.
2497             If divisor is >0, the attributes advance once per divisor instances of the set(s) of vertices being
2498             rendered.
2499 
2500             so we can directly pass the inputRate as VertexBindingDivisor. (ie. advance once per instance)
2501             ie. enum happens to match and can simply cast.
2502             */
2503             static_assert(CORE_VERTEX_INPUT_RATE_VERTEX == 0 && CORE_VERTEX_INPUT_RATE_INSTANCE == 1);
2504             device_.VertexBindingDivisor(binding, static_cast<uint32_t>(bindingRef.vertexInputRate));
2505         }
2506     }
2507 }
2508 
BindPipeline()2509 const BASE_NS::array_view<Binder>* RenderBackendGLES::BindPipeline()
2510 {
2511     const array_view<Binder>* resourceList = nullptr;
2512     const array_view<Gles::PushConstantReflection>* pushConstants = nullptr;
2513     int32_t flipLocation = Gles::INVALID_LOCATION;
2514     uint32_t program = 0;
2515     // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
2516     if (currentFrameBuffer_) { // mCurrentFrameBuffer is only set if graphics pipeline is bound..
2517         PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
2518         PLUGIN_ASSERT(boundGraphicsPipeline_);
2519         if (!boundGraphicsPipeline_) {
2520             return resourceList;
2521         }
2522         const auto& pipelineData =
2523             static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
2524         const GpuShaderProgramGLES* shader = pipelineData.graphicsShader;
2525 #if RENDER_HAS_GLES_BACKEND
2526         if (!oesBinds_.empty()) {
2527             // okay, oes vector contains the set/bind to which an OES texture is bounds
2528             // ask for a compatible program from the boundGraphicsPipeline_
2529             shader = boundGraphicsPipeline_->GetOESProgram(oesBinds_);
2530         }
2531 #endif
2532         if (!shader) {
2533             return resourceList;
2534         }
2535         const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
2536         program = sd.program;
2537 
2538         FlushViewportScissors();
2539         if (!scissorEnabled_) {
2540             scissorEnabled_ = true;
2541             glEnable(GL_SCISSOR_TEST); // Always enabled
2542         }
2543 #if (RENDER_PERF_ENABLED == 1)
2544         if (device_.BoundProgram() != program) {
2545             ++perfCounters_.bindProgram;
2546         }
2547 #endif
2548         device_.UseProgram(program);
2549         device_.BindVertexArray(pipelineData.vao);
2550         BindVertexInputs(pipelineData.vertexInputDeclaration, array_view<const int32_t>(sd.inputs, countof(sd.inputs)));
2551         device_.BindElementBuffer(boundIndexBuffer_.id);
2552         resourceList = &sd.resourceList;
2553         flipLocation = sd.flipLocation;
2554         pushConstants = &sd.pushConstants;
2555     } else {
2556         PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
2557         PLUGIN_ASSERT(boundComputePipeline_);
2558         if (!boundComputePipeline_) {
2559             return resourceList;
2560         }
2561         const auto& pipelineData =
2562             static_cast<const PipelineStateObjectPlatformDataGL&>(boundComputePipeline_->GetPlatformData());
2563         if (pipelineData.computeShader) {
2564             const auto& sd =
2565                 static_cast<const GpuComputeProgramPlatformDataGL&>(pipelineData.computeShader->GetPlatformData());
2566             program = sd.program;
2567 #if (RENDER_PERF_ENABLED == 1)
2568             if (device_.BoundProgram() != program) {
2569                 ++perfCounters_.bindProgram;
2570             }
2571 #endif
2572             device_.UseProgram(program);
2573             resourceList = &sd.resourceList;
2574             flipLocation = sd.flipLocation;
2575             pushConstants = &sd.pushConstants;
2576         }
2577     }
2578 
2579     if (pushConstants) {
2580         SetPushConstants(program, *pushConstants);
2581     }
2582     if (flipLocation != Gles::INVALID_LOCATION) {
2583         const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
2584         glProgramUniform1fv(program, flipLocation, 1, &flip);
2585     }
2586     return resourceList;
2587 }
2588 
BindResources()2589 void RenderBackendGLES::BindResources()
2590 {
2591 #if RENDER_HAS_GLES_BACKEND
2592     // scan all sets here to see if any of the sets has oes.
2593     // we don't actually need to rebuild this info every time.
2594     // should "emulate" the gpu descriptor sets better. (and store this information along with the other bind cache
2595     // data there)
2596     oesBinds_.clear();
2597     for (const auto& state : boundObjects_) {
2598         const auto& oes = state.oesBinds;
2599         if (!oes.empty()) {
2600             oesBinds_.append(oes.begin(), oes.end());
2601         }
2602     }
2603 #endif
2604     const auto* resourceList = BindPipeline();
2605     if (!resourceList) {
2606         return;
2607     }
2608     for (const auto& r : *resourceList) {
2609         PLUGIN_ASSERT(r.set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
2610         if (r.bind >= static_cast<uint32_t>(boundObjects_[r.set].resources.size())) {
2611             continue;
2612         }
2613         const auto& res = boundObjects_[r.set].resources[r.bind];
2614         PLUGIN_ASSERT(res.resources.size() == r.id.size());
2615         auto resType = res.descriptorType;
2616         if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
2617             resType = CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
2618         } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2619             resType = CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2620         }
2621 
2622         // a few helpers for updating perf counters and binding the sampler/texture/buffer
2623         auto bindSampler = [this](uint32_t textureUnit, uint32_t samplerId) {
2624 #if (RENDER_PERF_ENABLED == 1)
2625             if (device_.BoundSampler(textureUnit) != samplerId) {
2626                 ++perfCounters_.bindSampler;
2627             }
2628 #endif
2629             device_.BindSampler(textureUnit, samplerId);
2630         };
2631         auto bindTexture = [this](uint32_t textureUnit, const GpuImagePlatformDataGL& dplat) {
2632 #if (RENDER_PERF_ENABLED == 1)
2633             if (device_.BoundTexture(textureUnit, dplat.type) != dplat.image) {
2634                 ++perfCounters_.bindTexture;
2635             }
2636 #endif
2637             device_.BindTexture(textureUnit, dplat.type, dplat.image);
2638         };
2639         auto bindTextureImage = [this](uint32_t textureUnit, const Gles::Bind::ImageType& image,
2640                                     const GpuImagePlatformDataGL& dplat) {
2641             uint32_t level = (image.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? image.mipLevel : 0U;
2642             device_.BindImageTexture(textureUnit, dplat.image, level, false, 0, image.mode, dplat.internalFormat);
2643         };
2644         auto bindBuffer = [this](uint32_t target, uint32_t binding, const Gles::Bind::BufferType& buffer) {
2645 #if (RENDER_PERF_ENABLED == 1)
2646             if (device_.BoundBuffer(target) != buffer.bufferId) {
2647                 ++perfCounters_.bindBuffer;
2648             }
2649 #endif
2650             device_.BindBufferRange(target, binding, buffer.bufferId, buffer.offset, buffer.size);
2651         };
2652         auto setMipLevel = [](const uint32_t type, const uint32_t mipLevel) {
2653             // either force the defined mip level or use defaults.
2654             glTexParameteri(type, GL_TEXTURE_BASE_LEVEL,
2655                 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 0U));
2656             glTexParameteri(type, GL_TEXTURE_MAX_LEVEL,
2657                 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ?
2658 		        mipLevel : 1000U)); // 1000 : param
2659         };
2660 
2661 #if (RENDER_VALIDATION_ENABLED == 1)
2662         if (resType != r.type) {
2663             PLUGIN_LOG_ONCE_E(
2664                 "backend_desc_type_mismatch_gles", "RENDER_VALIDATION: shader / pipeline descriptor type mismatch");
2665         }
2666 #endif
2667 
2668         for (uint32_t index = 0; index < res.resources.size(); index++) {
2669             const auto& obj = res.resources[index];
2670             for (const auto& id : r.id[index]) {
2671                 const auto binding = index + id;
2672                 if (resType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
2673                     bindSampler(binding, obj.sampler.samplerId);
2674                 } else if ((resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
2675                            (resType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
2676                            (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
2677                     if (resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2678                         bindSampler(binding, obj.sampler.samplerId);
2679                     } else if (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
2680                         bindSampler(binding, 0U);
2681                     }
2682                     if (obj.image.image) {
2683                         auto& dplat = obj.image.image->GetPlatformData();
2684                         bindTexture(binding, dplat);
2685 
2686                         // NOTE: the last setting is active, can not have different miplevels bound from single
2687                         // resource.
2688                         // Check and update (if needed) the forced miplevel.
2689                         if (dplat.mipLevel != obj.image.mipLevel) {
2690                             // NOTE: we are actually modifying the texture object bound above
2691                             const_cast<GpuImagePlatformDataGL&>(dplat).mipLevel = obj.image.mipLevel;
2692                             setMipLevel(dplat.type, dplat.mipLevel);
2693                         }
2694                     }
2695                 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
2696                     if (obj.image.image) {
2697                         auto& dplat = obj.image.image->GetPlatformData();
2698                         bindTextureImage(binding, obj.image, dplat);
2699                     }
2700                 } else if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
2701                     bindBuffer(GL_UNIFORM_BUFFER, binding, obj.buffer);
2702                 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
2703                     bindBuffer(GL_SHADER_STORAGE_BUFFER, binding, obj.buffer);
2704                 }
2705             }
2706         }
2707     }
2708     // mark all bound.
2709     for (auto& b : boundObjects_) {
2710         b.dirty = false;
2711     }
2712 }
2713 
RenderCommandBeginDebugMarker(const RenderCommandWithType & ref)2714 void RenderBackendGLES::RenderCommandBeginDebugMarker(const RenderCommandWithType& ref)
2715 {
2716 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2717     const auto& renderCmd = *static_cast<const struct RenderCommandBeginDebugMarker*>(ref.rc);
2718     glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)renderCmd.name.data());
2719 #endif
2720 }
2721 
RenderCommandEndDebugMarker(const RenderCommandWithType &)2722 void RenderBackendGLES::RenderCommandEndDebugMarker(const RenderCommandWithType&)
2723 {
2724 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2725     glPopDebugGroup();
2726 #endif
2727 }
2728 
2729 #if (RENDER_PERF_ENABLED == 1)
StartFrameTimers(const RenderCommandFrameData & renderCommandFrameData)2730 void RenderBackendGLES::StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)
2731 {
2732     framePerfCounters_ = {};
2733     for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2734         const string_view& debugName = renderCommandContext.debugName;
2735         if (timers_.count(debugName) == 0) { // new timers
2736 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2737             PerfDataSet& perfDataSet = timers_[debugName];
2738             constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2739             perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryGLES(device_, desc));
2740             perfDataSet.counter = 0u;
2741 #else
2742             timers_.insert({ debugName, {} });
2743 #endif
2744         }
2745     }
2746 }
2747 
EndFrameTimers()2748 void RenderBackendGLES::EndFrameTimers()
2749 {
2750     int64_t fullGpuTime = 0;
2751 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2752     // already in micros
2753     fullGpuTime = fullGpuCounter_;
2754     fullGpuCounter_ = 0;
2755 #endif
2756     if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2757             CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2758         globalPerfData) {
2759         CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RENDER");
2760         perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2761         perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2762         perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2763         perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2764         perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2765         perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2766 
2767         CORE_PROFILER_PLOT("Full_Cpu", static_cast<int64_t>(commonCpuTimers_.full.GetMicroseconds()));
2768         CORE_PROFILER_PLOT("Acquire_Cpu", static_cast<int64_t>(commonCpuTimers_.acquire.GetMicroseconds()));
2769         CORE_PROFILER_PLOT("Execute_Cpu", static_cast<int64_t>(commonCpuTimers_.execute.GetMicroseconds()));
2770         CORE_PROFILER_PLOT("Submit_Cpu", static_cast<int64_t>(commonCpuTimers_.submit.GetMicroseconds()));
2771         CORE_PROFILER_PLOT("Present_Cpu", static_cast<int64_t>(commonCpuTimers_.present.GetMicroseconds()));
2772         CORE_PROFILER_PLOT("Full_Gpu", static_cast<int64_t>(fullGpuTime));
2773     }
2774 
2775     CORE_PROFILER_PLOT("Instance count", static_cast<int64_t>(framePerfCounters_.instanceCount));
2776     CORE_PROFILER_PLOT("Triangle count", static_cast<int64_t>(framePerfCounters_.triangleCount));
2777     CORE_PROFILER_PLOT("Draw count", static_cast<int64_t>(framePerfCounters_.drawCount));
2778     CORE_PROFILER_PLOT("Draw Indirect count", static_cast<int64_t>(framePerfCounters_.drawIndirectCount));
2779     CORE_PROFILER_PLOT("Dispatch count", static_cast<int64_t>(framePerfCounters_.dispatchCount));
2780     CORE_PROFILER_PLOT("Dispatch Indirect count", static_cast<int64_t>(framePerfCounters_.dispatchIndirectCount));
2781     CORE_PROFILER_PLOT("RenderPass count", static_cast<int64_t>(framePerfCounters_.renderPassCount));
2782     CORE_PROFILER_PLOT("Bind program count", static_cast<int64_t>(framePerfCounters_.bindProgram));
2783     CORE_PROFILER_PLOT("Bind sampler count", static_cast<int64_t>(framePerfCounters_.bindSampler));
2784     CORE_PROFILER_PLOT("Bind texture count", static_cast<int64_t>(framePerfCounters_.bindTexture));
2785     CORE_PROFILER_PLOT("Bind buffer count", static_cast<int64_t>(framePerfCounters_.bindBuffer));
2786 }
2787 
CopyPerfTimeStamp(const string_view name,PerfDataSet & perfDataSet)2788 void RenderBackendGLES::CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)
2789 {
2790 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2791     int64_t gpuMicroSeconds = 0;
2792     if (validGpuQueries_) {
2793         GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
2794         PLUGIN_ASSERT(gpuQuery);
2795 
2796         gpuQuery->NextQueryIndex();
2797 
2798         const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
2799         PLUGIN_ASSERT(platData.queryObject);
2800 
2801         GLint disjointOccurred = 0;
2802 #ifdef GL_GPU_DISJOINT_EXT
2803         // Clear disjoint error
2804         glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
2805 #endif
2806         if (!disjointOccurred && (++perfDataSet.counter) > device_.GetCommandBufferingCount()) {
2807             GLuint64 gpuNanoSeconds = 0U;
2808 #ifdef GL_GPU_DISJOINT_EXT
2809             glGetQueryObjectui64vEXT(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2810 #else
2811             glGetQueryObjectui64v(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2812 #endif
2813             static uint64_t NANOSECONDS_TO_MICROSECONDS = 1000; // 1000 : size
2814             gpuMicroSeconds = static_cast<int64_t>(gpuNanoSeconds / NANOSECONDS_TO_MICROSECONDS);
2815             if (gpuMicroSeconds > UINT32_MAX) {
2816                 gpuMicroSeconds = 0;
2817             }
2818             fullGpuCounter_ += gpuMicroSeconds;
2819         } else if (disjointOccurred) {
2820             PLUGIN_LOG_V("GL_GPU_DISJOINT_EXT disjoint occurred.");
2821         }
2822     }
2823 #endif
2824     const int64_t cpuMicroSeconds = perfDataSet.cpuTimer.GetMicroseconds();
2825 
2826     if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2827             CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2828         globalPerfData) {
2829         CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2830 
2831         perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2832 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2833         perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2834 #endif
2835         perfData->UpdateData(name, "Backend_Count_Triangle", perfCounters_.triangleCount);
2836         perfData->UpdateData(name, "Backend_Count_InstanceCount", perfCounters_.instanceCount);
2837         perfData->UpdateData(name, "Backend_Count_Draw", perfCounters_.drawCount);
2838         perfData->UpdateData(name, "Backend_Count_DrawIndirect", perfCounters_.drawIndirectCount);
2839         perfData->UpdateData(name, "Backend_Count_Dispatch", perfCounters_.dispatchCount);
2840         perfData->UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters_.dispatchIndirectCount);
2841         perfData->UpdateData(name, "Backend_Count_RenderPass", perfCounters_.renderPassCount);
2842         perfData->UpdateData(name, "Backend_Count_BindProgram", perfCounters_.bindProgram);
2843         perfData->UpdateData(name, "Backend_Count_BindSample", perfCounters_.bindSampler);
2844         perfData->UpdateData(name, "Backend_Count_BindTexture", perfCounters_.bindTexture);
2845         perfData->UpdateData(name, "Backend_Count_BindBuffer", perfCounters_.bindBuffer);
2846         framePerfCounters_.drawCount += perfCounters_.drawCount;
2847         framePerfCounters_.drawIndirectCount += perfCounters_.drawIndirectCount;
2848         framePerfCounters_.dispatchCount += perfCounters_.dispatchCount;
2849         framePerfCounters_.dispatchIndirectCount += perfCounters_.dispatchIndirectCount;
2850         framePerfCounters_.renderPassCount += perfCounters_.renderPassCount;
2851         framePerfCounters_.bindProgram += perfCounters_.bindProgram;
2852         framePerfCounters_.bindSampler += perfCounters_.bindSampler;
2853         framePerfCounters_.bindTexture += perfCounters_.bindTexture;
2854         framePerfCounters_.bindBuffer += perfCounters_.bindBuffer;
2855         framePerfCounters_.triangleCount += perfCounters_.triangleCount;
2856         framePerfCounters_.instanceCount += perfCounters_.instanceCount;
2857     }
2858 }
2859 #endif
2860 
PrimeDepthStencilState(const GraphicsState & graphicsState)2861 void RenderBackendGLES::PrimeDepthStencilState(const GraphicsState& graphicsState)
2862 {
2863     auto& cDepth = cacheState_.depthStencilState;
2864     cDepth = graphicsState.depthStencilState;
2865     // CORE_DYNAMIC_STATE_DEPTH_BOUNDS NOT SUPPORTED ON GLES. (and not implemented on GL either)
2866     SetState(GL_DEPTH_TEST, cDepth.enableDepthTest);
2867     SetState(GL_STENCIL_TEST, cDepth.enableStencilTest);
2868     glDepthFunc(GetCompareOp(cDepth.depthCompareOp));
2869     glDepthMask((cDepth.enableDepthWrite ? static_cast<GLboolean>(GL_TRUE) : static_cast<GLboolean>(GL_FALSE)));
2870     const uint32_t updateAllFlags =
2871         (StencilSetFlags::SETOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETCOMPAREOP |
2872             StencilSetFlags::SETREFERENCE | StencilSetFlags::SETWRITEMASK);
2873     SetStencilState(updateAllFlags, cDepth.frontStencilOpState, updateAllFlags, cDepth.backStencilOpState);
2874 }
2875 
PrimeBlendState(const GraphicsState & graphicsState)2876 void RenderBackendGLES::PrimeBlendState(const GraphicsState& graphicsState)
2877 {
2878     auto& cBlend = cacheState_.colorBlendState;
2879     cBlend = graphicsState.colorBlendState;
2880     glBlendColor(cBlend.colorBlendConstants[Gles::RED_INDEX], cBlend.colorBlendConstants[Gles::GREEN_INDEX],
2881         cBlend.colorBlendConstants[Gles::BLUE_INDEX], cBlend.colorBlendConstants[Gles::ALPHA_INDEX]);
2882     GLuint maxColorAttachments;
2883     glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS, (GLint*)&maxColorAttachments);
2884     maxColorAttachments = BASE_NS::Math::min(PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT, maxColorAttachments);
2885     for (GLuint i = 0; i < maxColorAttachments; i++) {
2886         const auto& cBlendState = cBlend.colorAttachments[i];
2887         glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2888             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2889             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2890             IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2891         if (cBlendState.enableBlend) {
2892             glEnablei(GL_BLEND, i);
2893         } else {
2894             glDisablei(GL_BLEND, i);
2895         }
2896         glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2897             GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2898             GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2899         glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2900     }
2901     // logicops are unsupported on GLES
2902 }
2903 
PrimeCache(const GraphicsState & graphicsState)2904 void RenderBackendGLES::PrimeCache(const GraphicsState& graphicsState) // Forces the graphics state..
2905 {
2906     if (cachePrimed_) {
2907         return;
2908     }
2909     cachePrimed_ = true;
2910     /// GRAPHICSSTATE     inputAssembly
2911     const auto& ia = graphicsState.inputAssembly;
2912     auto& cia = cacheState_.inputAssembly;
2913     cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
2914     SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
2915     topology_ = ia.primitiveTopology;
2916     /// GRAPHICSSTATE     rasterizationState
2917     const auto& rs = graphicsState.rasterizationState;
2918     auto& crs = cacheState_.rasterizationState;
2919     // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2920     polygonMode_ = rs.polygonMode;
2921     // GL_DEPTH_CLAMP,rs.enableDepthClamp NOT SUPPORTED    CHECK GLES 3.2
2922     crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2923     SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2924     crs.enableDepthBias = rs.enableDepthBias;
2925     SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2926     crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2927     crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2928     glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2929     // depthBiasClamp NOT SUPPORTED! CHECK GLES 3.2
2930     // If cull mode Flags change...
2931     crs.cullModeFlags = rs.cullModeFlags;
2932     SetCullMode(crs);
2933     crs.frontFace = rs.frontFace;
2934     SetFrontFace(crs);
2935     crs.lineWidth = rs.lineWidth;
2936     glLineWidth(rs.lineWidth);
2937     PrimeDepthStencilState(graphicsState);
2938     PrimeBlendState(graphicsState);
2939 }
2940 
UpdateDepthState(const GraphicsState & graphicsState)2941 void RenderBackendGLES::UpdateDepthState(const GraphicsState& graphicsState)
2942 {
2943     const auto& depth = graphicsState.depthStencilState;
2944     auto& cDepth = cacheState_.depthStencilState;
2945     if (depth.enableDepthTest != cDepth.enableDepthTest) {
2946         cDepth.enableDepthTest = depth.enableDepthTest;
2947         SetState(GL_DEPTH_TEST, depth.enableDepthTest);
2948     }
2949     if (depth.depthCompareOp != cDepth.depthCompareOp) {
2950         cDepth.depthCompareOp = depth.depthCompareOp;
2951         glDepthFunc(GetCompareOp(depth.depthCompareOp));
2952     }
2953     if (depth.enableDepthWrite != cDepth.enableDepthWrite) {
2954         cDepth.enableDepthWrite = depth.enableDepthWrite;
2955         glDepthMask((depth.enableDepthWrite == GL_TRUE));
2956     }
2957     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BOUNDS)) {
2958         // CORE_DYNAMIC_STATE_DEPTH_BOUNDS not supported on GLES.
2959     }
2960 }
2961 
UpdateStencilState(const GraphicsState & graphicsState)2962 void RenderBackendGLES::UpdateStencilState(const GraphicsState& graphicsState)
2963 {
2964     const auto& depth = graphicsState.depthStencilState;
2965     auto& cDepth = cacheState_.depthStencilState;
2966     if (depth.enableStencilTest != cDepth.enableStencilTest) {
2967         cDepth.enableStencilTest = depth.enableStencilTest;
2968         SetState(GL_STENCIL_TEST, depth.enableStencilTest);
2969     }
2970     uint32_t setFront = 0;
2971     uint32_t setBack = 0;
2972     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_REFERENCE)) {
2973         if (cDepth.frontStencilOpState.reference != depth.frontStencilOpState.reference) {
2974             setFront |= StencilSetFlags::SETREFERENCE;
2975         }
2976         if (cDepth.backStencilOpState.reference != depth.backStencilOpState.reference) {
2977             setBack |= StencilSetFlags::SETREFERENCE;
2978         }
2979     }
2980     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
2981         if (cDepth.frontStencilOpState.compareMask != depth.frontStencilOpState.compareMask) {
2982             setFront |= StencilSetFlags::SETCOMPAREMASK;
2983         }
2984         if (cDepth.backStencilOpState.compareMask != depth.backStencilOpState.compareMask) {
2985             setBack |= StencilSetFlags::SETCOMPAREMASK;
2986         }
2987     }
2988     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
2989         if (cDepth.frontStencilOpState.writeMask != depth.frontStencilOpState.writeMask) {
2990             setFront |= StencilSetFlags::SETWRITEMASK;
2991         }
2992         if (cDepth.backStencilOpState.writeMask != depth.backStencilOpState.writeMask) {
2993             setBack |= StencilSetFlags::SETWRITEMASK;
2994         }
2995     }
2996     if (cDepth.frontStencilOpState.compareOp != depth.frontStencilOpState.compareOp) {
2997         setFront |= StencilSetFlags::SETCOMPAREOP;
2998     }
2999     if (cDepth.backStencilOpState.compareOp != depth.backStencilOpState.compareOp) {
3000         setBack |= StencilSetFlags::SETCOMPAREOP;
3001     }
3002     if (!CompareStencilOp(cDepth.frontStencilOpState, depth.frontStencilOpState)) {
3003         setFront |= StencilSetFlags::SETOP;
3004     }
3005     if (!CompareStencilOp(cDepth.backStencilOpState, depth.backStencilOpState)) {
3006         setBack |= StencilSetFlags::SETOP;
3007     }
3008     SetStencilState(setFront, depth.frontStencilOpState, setBack, depth.backStencilOpState);
3009 }
3010 
UpdateDepthStencilState(const GraphicsState & graphicsState)3011 void RenderBackendGLES::UpdateDepthStencilState(const GraphicsState& graphicsState)
3012 {
3013     UpdateDepthState(graphicsState);
3014     UpdateStencilState(graphicsState);
3015 }
3016 
UpdateBlendState(const GraphicsState & graphicsState)3017 void RenderBackendGLES::UpdateBlendState(const GraphicsState& graphicsState)
3018 {
3019     const auto& blend = graphicsState.colorBlendState;
3020     auto& cBlend = cacheState_.colorBlendState;
3021     for (GLuint i = 0; i < blend.colorAttachmentCount; i++) {
3022         const auto& blendState = blend.colorAttachments[i];
3023         auto& cBlendState = cBlend.colorAttachments[i];
3024         if (blendState.colorWriteMask != cBlendState.colorWriteMask) {
3025             cBlendState.colorWriteMask = blendState.colorWriteMask;
3026             glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
3027                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
3028                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
3029                 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
3030         }
3031 
3032         // Check if blend state has changed
3033         bool factorsChanged = false;
3034         bool opsChanged = false;
3035 
3036         if (blendState.enableBlend) {
3037             factorsChanged = !CompareBlendFactors(cBlendState, blendState);
3038             opsChanged = !CompareBlendOps(cBlendState, blendState);
3039         }
3040 
3041         if (blendState.enableBlend == cBlendState.enableBlend && !factorsChanged && !opsChanged) {
3042             continue;
3043         }
3044         cBlendState.enableBlend = blendState.enableBlend;
3045         if (blendState.enableBlend) {
3046             glEnablei(GL_BLEND, i);
3047             if (factorsChanged) {
3048                 SetBlendFactors(cBlendState, blendState);
3049                 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
3050                     GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
3051                     GetBlendFactor(cBlendState.dstAlphaBlendFactor));
3052             }
3053             if (opsChanged) {
3054                 SetBlendOps(cBlendState, blendState);
3055                 glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
3056             }
3057         } else {
3058             glDisablei(GL_BLEND, i);
3059         }
3060     }
3061     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_BLEND_CONSTANTS)) {
3062         if (!Compare(cBlend.colorBlendConstants, blend.colorBlendConstants)) {
3063             Set(cBlend.colorBlendConstants, blend.colorBlendConstants);
3064             glBlendColor(blend.colorBlendConstants[Gles::RED_INDEX], blend.colorBlendConstants[Gles::GREEN_INDEX],
3065                 blend.colorBlendConstants[Gles::BLUE_INDEX], blend.colorBlendConstants[Gles::ALPHA_INDEX]);
3066         }
3067     }
3068     // logicOps in blend not supported on GLES
3069 }
3070 
UpdateRasterizationState(const GraphicsState & graphicsState)3071 void RenderBackendGLES::UpdateRasterizationState(const GraphicsState& graphicsState)
3072 {
3073     const auto& rs = graphicsState.rasterizationState;
3074     auto& crs = cacheState_.rasterizationState;
3075     // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
3076     polygonMode_ = rs.polygonMode;
3077 #if RENDER_HAS_GL_BACKEND
3078     if (rs.polygonMode != crs.polygonMode) {
3079         crs.polygonMode = rs.polygonMode;
3080         SetPolygonMode(rs);
3081     }
3082 #endif
3083     if (rs.enableDepthClamp != crs.enableDepthClamp) {
3084         crs.enableDepthClamp = rs.enableDepthClamp;
3085         // NOT SUPPORTED    (needs an extension)
3086     }
3087     if (rs.enableRasterizerDiscard != crs.enableRasterizerDiscard) {
3088         crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
3089         SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
3090     }
3091     if (rs.enableDepthBias != crs.enableDepthBias) {
3092         crs.enableDepthBias = rs.enableDepthBias;
3093         SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
3094     }
3095     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BIAS)) {
3096         if ((rs.depthBiasConstantFactor != crs.depthBiasConstantFactor) ||
3097             (rs.depthBiasSlopeFactor != crs.depthBiasSlopeFactor)) {
3098             crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
3099             crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
3100             glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
3101         }
3102         // depthBiasClamp NOT SUPPORTED    (needs an extension)
3103     }
3104     // If cull mode Flags change...
3105     if (rs.cullModeFlags != crs.cullModeFlags) {
3106         crs.cullModeFlags = rs.cullModeFlags;
3107         SetCullMode(crs);
3108     }
3109     auto frontFace = rs.frontFace;
3110     if (!renderingToDefaultFbo_) {
3111         // Flip winding for default fbo.
3112         if (frontFace == FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE) {
3113             frontFace = FrontFace::CORE_FRONT_FACE_CLOCKWISE;
3114         } else if (frontFace == FrontFace::CORE_FRONT_FACE_CLOCKWISE) {
3115             frontFace = FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE;
3116         }
3117     }
3118     if (frontFace != crs.frontFace) {
3119         crs.frontFace = frontFace;
3120         SetFrontFace(crs);
3121     }
3122     if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_LINE_WIDTH)) {
3123         if (rs.lineWidth != crs.lineWidth) {
3124             crs.lineWidth = rs.lineWidth;
3125             glLineWidth(rs.lineWidth);
3126         }
3127     }
3128 }
3129 
DoGraphicsState(const GraphicsState & graphicsState)3130 void RenderBackendGLES::DoGraphicsState(const GraphicsState& graphicsState)
3131 {
3132     /// GRAPHICSSTATE     inputAssembly
3133     const auto& ia = graphicsState.inputAssembly;
3134     if (ia.enablePrimitiveRestart != graphicsState.inputAssembly.enablePrimitiveRestart) {
3135         auto& cia = cacheState_.inputAssembly;
3136         cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
3137         SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
3138     }
3139     topology_ = ia.primitiveTopology;
3140     UpdateRasterizationState(graphicsState);
3141     UpdateDepthStencilState(graphicsState);
3142     UpdateBlendState(graphicsState);
3143 }
3144 
SetViewport(const RenderPassDesc::RenderArea & ra,const ViewportDesc & vd)3145 void RenderBackendGLES::SetViewport(const RenderPassDesc::RenderArea& ra, const ViewportDesc& vd)
3146 {
3147     // NOTE: viewportdesc is in floats?!?
3148     bool forceV = false;
3149     bool forceD = false;
3150     if (!viewportPrimed_) {
3151         viewportPrimed_ = true;
3152         forceV = true;
3153         forceD = true;
3154     }
3155     if ((vd.x != viewport_.x) || (vd.y != viewport_.y) || (vd.width != viewport_.width) ||
3156         (vd.height != viewport_.height)) {
3157         forceV = true;
3158     }
3159     if ((vd.minDepth != viewport_.minDepth) || (vd.maxDepth != viewport_.maxDepth)) {
3160         forceD = true;
3161     }
3162 
3163     if (forceV) {
3164         viewport_.x = vd.x;
3165         viewport_.y = vd.y;
3166         viewport_.width = vd.width;
3167         viewport_.height = vd.height;
3168         viewportUpdated_ = true;
3169     }
3170     if (forceD) {
3171         viewport_.minDepth = vd.minDepth;
3172         viewport_.maxDepth = vd.maxDepth;
3173         viewportDepthRangeUpdated_ = true;
3174     }
3175 }
3176 
SetScissor(const RenderPassDesc::RenderArea & ra,const ScissorDesc & sd)3177 void RenderBackendGLES::SetScissor(const RenderPassDesc::RenderArea& ra, const ScissorDesc& sd)
3178 {
3179     // NOTE: scissordesc is in floats?!?
3180     bool force = false;
3181     if (!scissorPrimed_) {
3182         scissorPrimed_ = true;
3183         force = true;
3184     }
3185     if ((sd.offsetX != scissorBox_.offsetX) || (sd.offsetY != scissorBox_.offsetY) ||
3186         (sd.extentWidth != scissorBox_.extentWidth) || (sd.extentHeight != scissorBox_.extentHeight)) {
3187         force = true;
3188     }
3189     if (force) {
3190         scissorBox_ = sd;
3191         scissorBoxUpdated_ = true;
3192     }
3193 }
3194 
FlushViewportScissors()3195 void RenderBackendGLES::FlushViewportScissors()
3196 {
3197     if (!currentFrameBuffer_) {
3198         return;
3199     }
3200     bool force = false;
3201     if (scissorViewportSetDefaultFbo_ != renderingToDefaultFbo_) {
3202         force = true;
3203         scissorViewportSetDefaultFbo_ = renderingToDefaultFbo_;
3204     }
3205     if ((viewportUpdated_) || (force)) {
3206         viewportUpdated_ = false;
3207         // Handle top-left / bottom-left origin conversion
3208         PLUGIN_ASSERT(currentFrameBuffer_);
3209         auto y = static_cast<GLint>(viewport_.y);
3210         const auto h = static_cast<GLsizei>(viewport_.height);
3211         if (renderingToDefaultFbo_) {
3212             const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
3213             y = fh - (y + h);
3214         }
3215         glViewport(static_cast<GLint>(viewport_.x), y, static_cast<GLsizei>(viewport_.width), h);
3216     }
3217     if ((scissorBoxUpdated_) || (force)) {
3218         scissorBoxUpdated_ = false;
3219         // Handle top-left / bottom-left origin conversion
3220         auto y = static_cast<GLint>(scissorBox_.offsetY);
3221         const auto h = static_cast<GLsizei>(scissorBox_.extentHeight);
3222         if (renderingToDefaultFbo_) {
3223             const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
3224             y = fh - (y + h);
3225         }
3226         glScissor(static_cast<GLint>(scissorBox_.offsetX), y, static_cast<GLsizei>(scissorBox_.extentWidth), h);
3227     }
3228     if (viewportDepthRangeUpdated_) {
3229         viewportDepthRangeUpdated_ = false;
3230         glDepthRangef(viewport_.minDepth, viewport_.maxDepth);
3231     }
3232 }
3233 RENDER_END_NAMESPACE()
3234