1 /*
2 * Copyright (c) 2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "render_backend_gles.h"
17
18 #include <algorithm>
19
20 #if RENDER_HAS_GLES_BACKEND
21 #define EGL_EGLEXT_PROTOTYPES
22 #include <EGL/egl.h>
23 #include <EGL/eglext.h>
24 #undef EGL_EGLEXT_PROTOTYPES
25 #endif
26
27 #include <base/containers/fixed_string.h>
28 #include <render/datastore/render_data_store_render_pods.h> // NodeGraphBackbufferConfiguration...
29 #include <render/namespace.h>
30
31 #if (RENDER_PERF_ENABLED == 1)
32 #include <core/perf/cpu_perf_scope.h>
33 #include <core/perf/intf_performance_data_manager.h>
34
35 #include "perf/gpu_query.h"
36 #include "perf/gpu_query_manager.h"
37 #endif
38 #include "device/gpu_resource_manager.h"
39 #include "gles/device_gles.h"
40 #include "gles/gl_functions.h"
41 #include "gles/gpu_buffer_gles.h"
42 #include "gles/gpu_image_gles.h"
43 #include "gles/gpu_program_gles.h"
44 #include "gles/gpu_query_gles.h"
45 #include "gles/gpu_sampler_gles.h"
46 #include "gles/gpu_semaphore_gles.h"
47 #include "gles/node_context_descriptor_set_manager_gles.h"
48 #include "gles/node_context_pool_manager_gles.h"
49 #include "gles/pipeline_state_object_gles.h"
50 #include "gles/render_frame_sync_gles.h"
51 #include "gles/swapchain_gles.h"
52 #include "nodecontext/render_command_list.h"
53 #include "nodecontext/render_node_graph_node_store.h" // RenderCommandFrameData
54 #include "util/log.h"
55 #include "util/render_frame_util.h"
56
57 #define IS_BIT(value, bit) ((((value) & (bit)) == (bit)) ? true : false)
58 #define IS_BIT_GL(value, bit) ((((value) & (bit)) == (bit)) ? (GLboolean)GL_TRUE : (GLboolean)GL_FALSE)
59
60 using namespace BASE_NS;
61
62 // NOTE: implement missing commands, add state caching and cleanup a bit more.
63 RENDER_BEGIN_NAMESPACE()
64 namespace Gles {
65 // Indices to colorBlendConstants
66 static constexpr uint32_t RED_INDEX = 0;
67 static constexpr uint32_t GREEN_INDEX = 1;
68 static constexpr uint32_t BLUE_INDEX = 2;
69 static constexpr uint32_t ALPHA_INDEX = 3;
70 static constexpr uint32_t CUBEMAP_LAYERS = 6;
71 struct Bind {
72 DescriptorType descriptorType { CORE_DESCRIPTOR_TYPE_MAX_ENUM };
73 struct BufferType {
74 uint32_t bufferId;
75 uint32_t offset;
76 uint32_t size;
77 };
78 struct ImageType {
79 GpuImageGLES* image;
80 uint32_t mode;
81 uint32_t mipLevel;
82 };
83 struct SamplerType {
84 uint32_t samplerId;
85 };
86 struct Resource {
87 union {
88 Bind::BufferType buffer { 0, 0, 0 };
89 Bind::ImageType image;
90 };
91 SamplerType sampler { 0 };
92 };
93 vector<Resource> resources;
94 };
95 } // namespace Gles
96 namespace {
GetRenderHandleType(const DescriptorType descriptorType)97 constexpr RenderHandleType GetRenderHandleType(const DescriptorType descriptorType)
98 {
99 if (descriptorType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
100 return RenderHandleType::GPU_SAMPLER;
101 } else if (((descriptorType >= CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
102 (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE)) ||
103 (descriptorType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
104 return RenderHandleType::GPU_IMAGE;
105 } else if ((descriptorType >= CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER) &&
106 (descriptorType <= CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)) {
107 return RenderHandleType::GPU_BUFFER;
108 }
109 return RenderHandleType::UNDEFINED;
110 }
111
112 constexpr GLenum LAYER_ID[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
113 GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
114 GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
115
GetCubeMapTarget(GLenum type,uint32_t layer)116 GLenum GetCubeMapTarget(GLenum type, uint32_t layer)
117 {
118 if (type == GL_TEXTURE_CUBE_MAP) {
119 PLUGIN_ASSERT_MSG(layer < Gles::CUBEMAP_LAYERS, "Invalid cubemap index %u", layer);
120 return LAYER_ID[layer];
121 }
122 PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
123 return GL_NONE;
124 }
125
GetTarget(GLenum type,uint32_t layer,uint32_t sampleCount)126 GLenum GetTarget(GLenum type, uint32_t layer, uint32_t sampleCount)
127 {
128 if (type == GL_TEXTURE_2D) {
129 if (sampleCount > 1) {
130 return GL_TEXTURE_2D_MULTISAMPLE;
131 }
132 return GL_TEXTURE_2D;
133 }
134 if (type == GL_TEXTURE_CUBE_MAP) {
135 PLUGIN_ASSERT_MSG(sampleCount == 1, "Cubemap texture can't have MSAA");
136 return GetCubeMapTarget(type, layer);
137 }
138 PLUGIN_ASSERT_MSG(false, "Unhandled type in getTarget! %x", type);
139 return GL_NONE;
140 }
141
142 struct BlitArgs {
143 uint32_t mipLevel {};
144 Size3D rect0 {};
145 Size3D rect1 {};
146 uint32_t height {};
147 };
148
DoBlit(const Filter filter,const BlitArgs & src,const BlitArgs & dst)149 void DoBlit(const Filter filter, const BlitArgs& src, const BlitArgs& dst)
150 {
151 // Handle top-left / bottom-left origin conversion
152 auto sy = static_cast<GLint>(src.rect0.height);
153 const auto sh = static_cast<const GLint>(src.rect1.height);
154 const auto sfh = static_cast<GLint>(src.height >> src.mipLevel);
155 sy = sfh - (sy + sh);
156 auto dy = static_cast<GLint>(dst.rect0.height);
157 const auto dh = static_cast<const GLint>(dst.rect1.height);
158 const auto dfh = static_cast<GLint>(dst.height >> dst.mipLevel);
159 dy = dfh - (dy + dh);
160 GLenum glfilter = GL_NEAREST;
161 if (filter == CORE_FILTER_NEAREST) {
162 glfilter = GL_NEAREST;
163 } else if (filter == CORE_FILTER_LINEAR) {
164 glfilter = GL_LINEAR;
165 } else {
166 PLUGIN_ASSERT_MSG(false, "RenderCommandBlitImage Invalid filter mode");
167 }
168 glBlitFramebuffer(static_cast<GLint>(src.rect0.width), sy, static_cast<GLint>(src.rect1.width), sfh,
169 static_cast<GLint>(dst.rect0.width), dy, static_cast<GLint>(dst.rect1.width), dfh, GL_COLOR_BUFFER_BIT,
170 glfilter);
171 }
172
GetPrimFromTopology(PrimitiveTopology op)173 GLenum GetPrimFromTopology(PrimitiveTopology op)
174 {
175 switch (op) {
176 case CORE_PRIMITIVE_TOPOLOGY_POINT_LIST:
177 return GL_POINTS;
178 case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST:
179 return GL_LINES;
180 case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP:
181 return GL_LINE_STRIP;
182 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
183 return GL_TRIANGLES;
184 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
185 return GL_TRIANGLE_STRIP;
186 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
187 return GL_TRIANGLE_FAN;
188 #if defined(GL_ES_VERSION_3_2) || defined(GL_VERSION_3_2)
189 // The following are valid after gles 3.2
190 case CORE_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
191 return GL_LINES_ADJACENCY;
192 case CORE_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
193 return GL_LINE_STRIP_ADJACENCY;
194 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
195 return GL_TRIANGLES_ADJACENCY;
196 case CORE_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
197 return GL_TRIANGLE_STRIP_ADJACENCY;
198 case CORE_PRIMITIVE_TOPOLOGY_PATCH_LIST:
199 return GL_PATCHES;
200 #endif
201 default:
202 PLUGIN_ASSERT_MSG(false, "Unsupported primitive topology");
203 break;
204 }
205 return GL_POINTS;
206 }
207
GetBlendOp(BlendOp func)208 GLenum GetBlendOp(BlendOp func)
209 {
210 switch (func) {
211 case CORE_BLEND_OP_ADD:
212 return GL_FUNC_ADD;
213 case CORE_BLEND_OP_SUBTRACT:
214 return GL_FUNC_SUBTRACT;
215 case CORE_BLEND_OP_REVERSE_SUBTRACT:
216 return GL_FUNC_REVERSE_SUBTRACT;
217 case CORE_BLEND_OP_MIN:
218 return GL_MIN;
219 case CORE_BLEND_OP_MAX:
220 return GL_MAX;
221 default:
222 break;
223 }
224 return GL_FUNC_ADD;
225 }
226
GetBlendFactor(BlendFactor factor)227 GLenum GetBlendFactor(BlendFactor factor)
228 {
229 switch (factor) {
230 case CORE_BLEND_FACTOR_ZERO:
231 return GL_ZERO;
232 case CORE_BLEND_FACTOR_ONE:
233 return GL_ONE;
234 case CORE_BLEND_FACTOR_SRC_COLOR:
235 return GL_SRC_COLOR;
236 case CORE_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
237 return GL_ONE_MINUS_SRC_COLOR;
238 case CORE_BLEND_FACTOR_DST_COLOR:
239 return GL_DST_COLOR;
240 case CORE_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
241 return GL_ONE_MINUS_DST_COLOR;
242 case CORE_BLEND_FACTOR_SRC_ALPHA:
243 return GL_SRC_ALPHA;
244 case CORE_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
245 return GL_ONE_MINUS_SRC_ALPHA;
246 case CORE_BLEND_FACTOR_DST_ALPHA:
247 return GL_DST_ALPHA;
248 case CORE_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
249 return GL_ONE_MINUS_DST_ALPHA;
250 case CORE_BLEND_FACTOR_CONSTANT_COLOR:
251 return GL_CONSTANT_COLOR;
252 case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
253 return GL_ONE_MINUS_CONSTANT_COLOR;
254 case CORE_BLEND_FACTOR_CONSTANT_ALPHA:
255 return GL_CONSTANT_ALPHA;
256 case CORE_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
257 return GL_ONE_MINUS_CONSTANT_ALPHA;
258 case CORE_BLEND_FACTOR_SRC_ALPHA_SATURATE:
259 return GL_SRC_ALPHA_SATURATE;
260 // NOTE: check the GLES3.2...
261 /* following requires EXT_blend_func_extended (dual source blending) */
262 case CORE_BLEND_FACTOR_SRC1_COLOR:
263 case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
264 case CORE_BLEND_FACTOR_SRC1_ALPHA:
265 case CORE_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
266 default:
267 break;
268 }
269 return GL_ONE;
270 }
271
GetCompareOp(CompareOp aOp)272 GLenum GetCompareOp(CompareOp aOp)
273 {
274 switch (aOp) {
275 case CORE_COMPARE_OP_NEVER:
276 return GL_NEVER;
277 case CORE_COMPARE_OP_LESS:
278 return GL_LESS;
279 case CORE_COMPARE_OP_EQUAL:
280 return GL_EQUAL;
281 case CORE_COMPARE_OP_LESS_OR_EQUAL:
282 return GL_LEQUAL;
283 case CORE_COMPARE_OP_GREATER:
284 return GL_GREATER;
285 case CORE_COMPARE_OP_NOT_EQUAL:
286 return GL_NOTEQUAL;
287 case CORE_COMPARE_OP_GREATER_OR_EQUAL:
288 return GL_GEQUAL;
289 case CORE_COMPARE_OP_ALWAYS:
290 return GL_ALWAYS;
291 default:
292 break;
293 }
294 return GL_ALWAYS;
295 }
296
GetStencilOp(StencilOp aOp)297 GLenum GetStencilOp(StencilOp aOp)
298 {
299 switch (aOp) {
300 case CORE_STENCIL_OP_KEEP:
301 return GL_KEEP;
302 case CORE_STENCIL_OP_ZERO:
303 return GL_ZERO;
304 case CORE_STENCIL_OP_REPLACE:
305 return GL_REPLACE;
306 case CORE_STENCIL_OP_INCREMENT_AND_CLAMP:
307 return GL_INCR;
308 case CORE_STENCIL_OP_DECREMENT_AND_CLAMP:
309 return GL_DECR;
310 case CORE_STENCIL_OP_INVERT:
311 return GL_INVERT;
312 case CORE_STENCIL_OP_INCREMENT_AND_WRAP:
313 return GL_INCR_WRAP;
314 case CORE_STENCIL_OP_DECREMENT_AND_WRAP:
315 return GL_DECR_WRAP;
316 default:
317 break;
318 }
319 return GL_KEEP;
320 }
321
SetState(GLenum type,bool enabled)322 void SetState(GLenum type, bool enabled)
323 {
324 if (enabled) {
325 glEnable(type);
326 } else {
327 glDisable(type);
328 }
329 }
330
SetCullMode(const GraphicsState::RasterizationState & rs)331 void SetCullMode(const GraphicsState::RasterizationState& rs)
332 {
333 SetState(GL_CULL_FACE, (rs.cullModeFlags != CORE_CULL_MODE_NONE));
334
335 switch (rs.cullModeFlags) {
336 case CORE_CULL_MODE_FRONT_BIT:
337 glCullFace(GL_FRONT);
338 break;
339 case CORE_CULL_MODE_BACK_BIT:
340 glCullFace(GL_BACK);
341 break;
342 case CORE_CULL_MODE_FRONT_AND_BACK:
343 glCullFace(GL_FRONT_AND_BACK);
344 break;
345 case CORE_CULL_MODE_NONE:
346 default:
347 break;
348 }
349 }
350
SetFrontFace(const GraphicsState::RasterizationState & rs)351 void SetFrontFace(const GraphicsState::RasterizationState& rs)
352 {
353 switch (rs.frontFace) {
354 case CORE_FRONT_FACE_COUNTER_CLOCKWISE:
355 glFrontFace(GL_CCW);
356 break;
357 case CORE_FRONT_FACE_CLOCKWISE:
358 glFrontFace(GL_CW);
359 break;
360 default:
361 break;
362 }
363 }
364
365 #if RENDER_HAS_GL_BACKEND
SetPolygonMode(const GraphicsState::RasterizationState & rs)366 void SetPolygonMode(const GraphicsState::RasterizationState& rs)
367 {
368 GLenum mode;
369 switch (rs.polygonMode) {
370 default:
371 case CORE_POLYGON_MODE_FILL:
372 mode = GL_FILL;
373 break;
374 case CORE_POLYGON_MODE_LINE:
375 mode = GL_LINE;
376 break;
377 case CORE_POLYGON_MODE_POINT:
378 mode = GL_POINT;
379 break;
380 }
381 glPolygonMode(GL_FRONT_AND_BACK, mode);
382 }
383 #endif
384
Invalidate(GLenum framebuffer,int32_t count,const GLenum invalidate[],const RenderPassDesc & rpd,const LowlevelFramebufferGL & frameBuffer)385 void Invalidate(GLenum framebuffer, int32_t count, const GLenum invalidate[], const RenderPassDesc& rpd,
386 const LowlevelFramebufferGL& frameBuffer)
387 {
388 if (count > 0) {
389 if ((frameBuffer.width == rpd.renderArea.extentWidth) && (frameBuffer.height == rpd.renderArea.extentHeight)) {
390 // Invalidate the whole buffer. (attachment sizes match render area)
391 glInvalidateFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate);
392 } else {
393 // invalidate only a part of the render target..
394 // NOTE: verify that this works, we might need to flip the Y axis the same way as scissors etc.
395 const auto X = static_cast<const GLint>(rpd.renderArea.offsetX);
396 const auto Y = static_cast<const GLint>(rpd.renderArea.offsetY);
397 const auto W = static_cast<const GLsizei>(rpd.renderArea.extentWidth);
398 const auto H = static_cast<const GLsizei>(rpd.renderArea.extentHeight);
399 glInvalidateSubFramebuffer(framebuffer, static_cast<GLsizei>(count), invalidate, X, Y, W, H);
400 }
401 }
402 }
403
404 struct BlitData {
405 const GpuImagePlatformDataGL& iPlat;
406 const GpuImageDesc& imageDesc;
407 const BufferImageCopy& bufferImageCopy;
408 uintptr_t data { 0 };
409 uint64_t size { 0 };
410 uint64_t sizeOfData { 0 };
411 bool compressed { false };
412 };
413
BlitArray(DeviceGLES & device_,const BlitData & bd)414 void BlitArray(DeviceGLES& device_, const BlitData& bd)
415 {
416 const auto& iPlat = bd.iPlat;
417 const auto& bufferImageCopy = bd.bufferImageCopy;
418 const auto& imageSubresource = bufferImageCopy.imageSubresource;
419 const auto& imageDesc = bd.imageDesc;
420 const uint32_t mip = imageSubresource.mipLevel;
421 const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth };
422 // NOTE: image offset depth is ignored
423 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
424 const Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
425 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height),
426 Math::min(imageSize.z, bufferImageCopy.imageExtent.depth) };
427 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
428 if (valid) {
429 uintptr_t data = bd.data;
430 const uint32_t layerCount = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
431 for (uint32_t layer = imageSubresource.baseArrayLayer; layer < layerCount; layer++) {
432 const Math::UVec3 offset3D { offset.x, offset.y, layer };
433 if (bd.compressed) {
434 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
435 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
436 } else {
437 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
438 iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
439 }
440 data += static_cast<ptrdiff_t>(bd.sizeOfData);
441 }
442 }
443 }
444
Blit2D(DeviceGLES & device_,const BlitData & bd)445 void Blit2D(DeviceGLES& device_, const BlitData& bd)
446 {
447 const auto& iPlat = bd.iPlat;
448 const auto& bufferImageCopy = bd.bufferImageCopy;
449 const auto& imageSubresource = bufferImageCopy.imageSubresource;
450 const auto& imageDesc = bd.imageDesc;
451 const uint32_t mip = imageSubresource.mipLevel;
452 const Math::UVec2 imageSize { imageDesc.width >> mip, imageDesc.height >> mip };
453 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
454 const Math::UVec2 extent { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
455 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height) };
456 PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == 1,
457 "RenderCommandCopyBufferImage Texture2D with baseArrayLayer!=0 && layerCount!= 1");
458 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
459 const uintptr_t data = bd.data;
460 if (valid && bd.compressed) {
461 device_.CompressedTexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent,
462 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
463 } else if (valid) {
464 device_.TexSubImage2D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset, extent, iPlat.format,
465 iPlat.dataType, reinterpret_cast<const void*>(data));
466 }
467 }
468
Blit3D(DeviceGLES & device_,const BlitData & bd)469 void Blit3D(DeviceGLES& device_, const BlitData& bd)
470 {
471 const auto& iPlat = bd.iPlat;
472 const auto& bufferImageCopy = bd.bufferImageCopy;
473 const auto& imageSubresource = bufferImageCopy.imageSubresource;
474 const auto& imageDesc = bd.imageDesc;
475 const uint32_t mip = imageSubresource.mipLevel;
476 const Math::UVec3 imageSize { imageDesc.width >> mip, imageDesc.height >> mip, imageDesc.depth >> mip };
477 const Math::UVec3 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height,
478 bufferImageCopy.imageOffset.depth };
479 Math::UVec3 extent3D { Math::min(imageSize.x - offset.x, bufferImageCopy.imageExtent.width),
480 Math::min(imageSize.y - offset.y, bufferImageCopy.imageExtent.height), Math::min(imageSize.z - offset.z, 1U) };
481 const bool valid = (offset.x < imageSize.x) && (offset.y < imageSize.y);
482 if (valid) {
483 uintptr_t data = bd.data;
484 for (uint32_t slice = 0U; slice < imageSize.z; ++slice) {
485 const Math::UVec3 offset3D { offset.x, offset.y, slice };
486 if (bd.compressed) {
487 device_.CompressedTexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
488 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
489 } else {
490 device_.TexSubImage3D(iPlat.image, iPlat.type, imageSubresource.mipLevel, offset3D, extent3D,
491 iPlat.format, iPlat.dataType, reinterpret_cast<const void*>(data));
492 }
493 // offsets one slice
494 data += static_cast<uintptr_t>(bd.sizeOfData);
495 }
496 }
497 }
498
BlitCube(DeviceGLES & device_,const BlitData & bd)499 void BlitCube(DeviceGLES& device_, const BlitData& bd)
500 {
501 const auto& iPlat = bd.iPlat;
502 const auto& bufferImageCopy = bd.bufferImageCopy;
503 const auto& imageSubresource = bufferImageCopy.imageSubresource;
504 const Math::UVec2 offset { bufferImageCopy.imageOffset.width, bufferImageCopy.imageOffset.height };
505 const Math::UVec2 extent { bufferImageCopy.imageExtent.width, bufferImageCopy.imageExtent.height };
506 constexpr GLenum faceId[] = { GL_TEXTURE_CUBE_MAP_POSITIVE_X, GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
507 GL_TEXTURE_CUBE_MAP_POSITIVE_Y, GL_TEXTURE_CUBE_MAP_NEGATIVE_Y, GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
508 GL_TEXTURE_CUBE_MAP_NEGATIVE_Z, 0 };
509 PLUGIN_UNUSED(Gles::CUBEMAP_LAYERS);
510 PLUGIN_ASSERT_MSG(imageSubresource.baseArrayLayer == 0 && imageSubresource.layerCount == Gles::CUBEMAP_LAYERS,
511 "RenderCommandCopyBufferImage Cubemap with baseArrayLayer!=0 && layerCount!= 6");
512 uintptr_t data = bd.data;
513 const uint32_t lastLayer = imageSubresource.baseArrayLayer + imageSubresource.layerCount;
514 for (uint32_t i = imageSubresource.baseArrayLayer; i < lastLayer; i++) {
515 const GLenum face = faceId[i]; // convert layer index to cube map face id.
516 if (face == 0) {
517 // reached the end of cubemap faces (see faceId)
518 // so must stop copying.
519 break;
520 }
521 if (bd.compressed) {
522 device_.CompressedTexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent,
523 iPlat.internalFormat, static_cast<uint32_t>(bd.sizeOfData), reinterpret_cast<const void*>(data));
524 } else {
525 device_.TexSubImage2D(iPlat.image, face, imageSubresource.mipLevel, offset, extent, iPlat.format,
526 iPlat.dataType, reinterpret_cast<const void*>(data));
527 }
528 data += static_cast<uintptr_t>(bd.sizeOfData);
529 }
530 }
531 template<bool usePixelUnpackBuffer>
532
SetupBlit(DeviceGLES & device_,const BufferImageCopy & bufferImageCopy,GpuBufferGLES & srcGpuBuffer,const GpuImageGLES & dstGpuImage)533 BlitData SetupBlit(DeviceGLES& device_, const BufferImageCopy& bufferImageCopy, GpuBufferGLES& srcGpuBuffer,
534 const GpuImageGLES& dstGpuImage)
535 {
536 const auto& iPlat = dstGpuImage.GetPlatformData();
537 const auto& imageOffset = bufferImageCopy.imageOffset;
538 PLUGIN_UNUSED(imageOffset);
539 const auto& imageExtent = bufferImageCopy.imageExtent;
540 // size is calculated for single layer / slice
541 const uint64_t size = static_cast<uint64_t>(iPlat.bytesperpixel) *
542 static_cast<uint64_t>(bufferImageCopy.bufferImageHeight) *
543 static_cast<uint64_t>(bufferImageCopy.bufferRowLength);
544 uintptr_t data = bufferImageCopy.bufferOffset;
545 if constexpr (usePixelUnpackBuffer) {
546 const auto& plat = srcGpuBuffer.GetPlatformData();
547 device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, plat.buffer);
548 } else {
549 // Use the mapped pointer for glTexSubImage2D, this is a workaround on GL_INVALID_OPERATION on PVR GLES
550 // simulator and crash with ETC2 textures on NVIDIA..
551 data += reinterpret_cast<uintptr_t>(srcGpuBuffer.Map());
552 }
553 uint64_t sizeOfData = size;
554 const auto& compinfo = iPlat.compression;
555 if (compinfo.compressed) {
556 // how many blocks in width
557 const int64_t blockW = (imageExtent.width + (compinfo.blockW - 1)) / compinfo.blockW;
558 // how many blocks in height
559 const int64_t blockH = (imageExtent.height + (compinfo.blockH - 1)) / compinfo.blockH;
560 // size in bytes..
561 sizeOfData = static_cast<uint64_t>(((blockW * blockH) * compinfo.bytesperblock));
562
563 // Warn for partial copies. we do not handle those at the moment.
564 if (bufferImageCopy.bufferRowLength != 0) {
565 if (bufferImageCopy.bufferRowLength != blockW * compinfo.blockW) {
566 PLUGIN_LOG_W("Partial copies of compressed texture data is not currently supported. "
567 "Stride must match image width (with block align). "
568 "bufferImageCopy.bufferRowLength(%d) "
569 "imageExtent.width(%d) ",
570 bufferImageCopy.bufferRowLength, imageExtent.width);
571 }
572 }
573 glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
574 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0);
575 } else {
576 glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(bufferImageCopy.bufferRowLength));
577 glPixelStorei(GL_UNPACK_IMAGE_HEIGHT, static_cast<GLint>(bufferImageCopy.bufferImageHeight));
578 }
579 glPixelStorei(GL_UNPACK_ALIGNMENT, 1); // Make sure the align is tight.
580 return { iPlat, dstGpuImage.GetDesc(), bufferImageCopy, data, size, sizeOfData, compinfo.compressed };
581 }
582
583 template<bool usePixelUnpackBuffer>
FinishBlit(DeviceGLES & device_,const GpuBufferGLES & srcGpuBuffer)584 void FinishBlit(DeviceGLES& device_, const GpuBufferGLES& srcGpuBuffer)
585 {
586 if constexpr (usePixelUnpackBuffer) {
587 device_.BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
588 } else {
589 srcGpuBuffer.Unmap();
590 }
591 }
592
593 template<typename T, size_t N>
Compare(const T (& a)[N],const T (& b)[N])594 constexpr size_t Compare(const T (&a)[N], const T (&b)[N])
595 {
596 for (size_t i = 0; i < N; i++) {
597 if (a[i] != b[i])
598 return false;
599 }
600 return true;
601 }
602
603 template<typename T, size_t N>
604
Set(T (& a)[N],const T (& b)[N])605 constexpr size_t Set(T (&a)[N], const T (&b)[N])
606 {
607 for (size_t i = 0; i < N; i++) {
608 a[i] = b[i];
609 }
610 return true;
611 }
612
CompareBlendFactors(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)613 bool CompareBlendFactors(
614 const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
615 {
616 return (a.srcColorBlendFactor == b.srcColorBlendFactor) && (a.srcAlphaBlendFactor == b.srcAlphaBlendFactor) &&
617 (a.dstColorBlendFactor == b.dstColorBlendFactor) && (a.dstAlphaBlendFactor == b.dstAlphaBlendFactor);
618 }
619
SetBlendFactors(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)620 void SetBlendFactors(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
621 {
622 a.srcColorBlendFactor = b.srcColorBlendFactor;
623 a.srcAlphaBlendFactor = b.srcAlphaBlendFactor;
624 a.dstColorBlendFactor = b.dstColorBlendFactor;
625 a.dstAlphaBlendFactor = b.dstAlphaBlendFactor;
626 }
627
CompareBlendOps(const GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)628 bool CompareBlendOps(
629 const GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
630 {
631 return (a.colorBlendOp == b.colorBlendOp) && (a.alphaBlendOp == b.alphaBlendOp);
632 }
633
SetBlendOps(GraphicsState::ColorBlendState::Attachment & a,const GraphicsState::ColorBlendState::Attachment & b)634 void SetBlendOps(GraphicsState::ColorBlendState::Attachment& a, const GraphicsState::ColorBlendState::Attachment& b)
635 {
636 a.colorBlendOp = b.colorBlendOp;
637 a.alphaBlendOp = b.alphaBlendOp;
638 }
639
CompareStencilOp(const GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)640 bool CompareStencilOp(const GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
641 {
642 return (a.failOp == b.failOp) && (a.depthFailOp == b.depthFailOp) && (a.passOp == b.passOp);
643 }
644
SetStencilOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)645 void SetStencilOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
646 {
647 a.failOp = b.failOp;
648 a.depthFailOp = b.depthFailOp;
649 a.passOp = b.passOp;
650 }
651
SetStencilCompareOp(GraphicsState::StencilOpState & a,const GraphicsState::StencilOpState & b)652 void SetStencilCompareOp(GraphicsState::StencilOpState& a, const GraphicsState::StencilOpState& b)
653 {
654 a.compareOp = b.compareOp;
655 a.compareMask = b.compareMask;
656 a.reference = b.reference;
657 }
658
659 #if RENDER_VALIDATION_ENABLED
ValidateCopyImage(const Offset3D & offset,const Size3D & extent,uint32_t mipLevel,const GpuImageDesc & imageDesc)660 void ValidateCopyImage(const Offset3D& offset, const Size3D& extent, uint32_t mipLevel, const GpuImageDesc& imageDesc)
661 {
662 if (mipLevel >= imageDesc.mipCount) {
663 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage mipLevel must be less than image mipCount.");
664 }
665 if ((offset.x < 0) || (offset.y < 0) || (offset.z < 0)) {
666 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset must not be negative.");
667 }
668 if (((offset.x + extent.width) > imageDesc.width) || ((offset.y + extent.height) > imageDesc.height) ||
669 ((offset.z + extent.depth) > imageDesc.depth)) {
670 PLUGIN_LOG_W("RENDER_VALIDATION: CopyImage offset + extent does not fit in image.");
671 }
672 }
673
ValidateCopyImage(const ImageCopy & imageCopy,const GpuImageDesc & srcImageDesc,const GpuImageDesc & dstImageDesc)674 void ValidateCopyImage(const ImageCopy& imageCopy, const GpuImageDesc& srcImageDesc, const GpuImageDesc& dstImageDesc)
675 {
676 ValidateCopyImage(imageCopy.srcOffset, imageCopy.extent, imageCopy.srcSubresource.mipLevel, srcImageDesc);
677 ValidateCopyImage(imageCopy.dstOffset, imageCopy.extent, imageCopy.dstSubresource.mipLevel, dstImageDesc);
678 }
679 #endif
680
ClampOffset(int32_t & srcOffset,int32_t & dstOffset,uint32_t & size)681 constexpr void ClampOffset(int32_t& srcOffset, int32_t& dstOffset, uint32_t& size)
682 {
683 if (srcOffset < 0) {
684 auto iSize = static_cast<int32_t>(size);
685 size = static_cast<uint32_t>(iSize + srcOffset);
686 dstOffset -= srcOffset;
687 srcOffset = 0;
688 }
689 }
690
ClampOffset(Offset3D & srcOffset,Offset3D & dstOffset,Size3D & size)691 constexpr void ClampOffset(Offset3D& srcOffset, Offset3D& dstOffset, Size3D& size)
692 {
693 ClampOffset(srcOffset.x, dstOffset.x, size.width);
694 ClampOffset(srcOffset.y, dstOffset.y, size.height);
695 ClampOffset(srcOffset.z, dstOffset.z, size.depth);
696 }
697
ClampSize(int32_t offset,uint32_t maxSize,uint32_t & size)698 constexpr void ClampSize(int32_t offset, uint32_t maxSize, uint32_t& size)
699 {
700 if (size > static_cast<uint32_t>(static_cast<int32_t>(maxSize) - offset)) {
701 size = static_cast<uint32_t>(static_cast<int32_t>(maxSize) - offset);
702 }
703 }
704
ClampSize(const Offset3D & offset,const GpuImageDesc & desc,Size3D & size)705 constexpr void ClampSize(const Offset3D& offset, const GpuImageDesc& desc, Size3D& size)
706 {
707 ClampSize(offset.x, desc.width, size.width);
708 ClampSize(offset.y, desc.height, size.height);
709 ClampSize(offset.z, desc.depth, size.depth);
710 }
711
712 // helper which covers barriers supported by Barrier and BarrierByRegion
CommonBarrierBits(AccessFlags accessFlags,RenderHandleType resourceType)713 constexpr GLbitfield CommonBarrierBits(AccessFlags accessFlags, RenderHandleType resourceType)
714 {
715 GLbitfield barriers = 0;
716 if (accessFlags & CORE_ACCESS_UNIFORM_READ_BIT) {
717 barriers |= GL_UNIFORM_BARRIER_BIT;
718 }
719 if (accessFlags & CORE_ACCESS_SHADER_READ_BIT) {
720 // shader read covers UBO, SSBO, storage image etc. use resource type to limit the options.
721 if (resourceType == RenderHandleType::GPU_IMAGE) {
722 barriers |= GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
723 } else if (resourceType == RenderHandleType::GPU_BUFFER) {
724 barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
725 } else {
726 barriers |= GL_UNIFORM_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT |
727 GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
728 }
729 }
730 if (accessFlags & CORE_ACCESS_SHADER_WRITE_BIT) {
731 if (resourceType == RenderHandleType::GPU_IMAGE) {
732 barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT;
733 } else if (resourceType == RenderHandleType::GPU_BUFFER) {
734 barriers |= GL_SHADER_STORAGE_BARRIER_BIT;
735 } else {
736 barriers |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_SHADER_STORAGE_BARRIER_BIT;
737 }
738 }
739 if (accessFlags & (CORE_ACCESS_INPUT_ATTACHMENT_READ_BIT | CORE_ACCESS_COLOR_ATTACHMENT_READ_BIT |
740 CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT)) {
741 barriers |= GL_FRAMEBUFFER_BARRIER_BIT;
742 }
743 // GL_ATOMIC_COUNTER_BARRIER_BIT is not used at the moment
744 return barriers;
745 }
746
GetArrayOffset(const DescriptorSetLayoutBindingResourcesHandler & data,const DescriptorSetLayoutBindingResource & res)747 constexpr uint32_t GetArrayOffset(
748 const DescriptorSetLayoutBindingResourcesHandler& data, const DescriptorSetLayoutBindingResource& res)
749 {
750 const RenderHandleType type = GetRenderHandleType(res.binding.descriptorType);
751 if (type == RenderHandleType::GPU_BUFFER) {
752 return data.buffers[res.resourceIndex].desc.arrayOffset;
753 }
754 if (type == RenderHandleType::GPU_IMAGE) {
755 return data.images[res.resourceIndex].desc.arrayOffset;
756 }
757 if (type == RenderHandleType::GPU_SAMPLER) {
758 return data.samplers[res.resourceIndex].desc.arrayOffset;
759 }
760 return 0u;
761 }
762 } // namespace
763
RenderBackendGLES(Device & device,GpuResourceManager & gpuResourceManager)764 RenderBackendGLES::RenderBackendGLES(Device& device, GpuResourceManager& gpuResourceManager)
765 : RenderBackend(), device_(static_cast<DeviceGLES&>(device)), gpuResourceMgr_(gpuResourceManager)
766 {
767 #if (RENDER_PERF_ENABLED == 1)
768 validGpuQueries_ = false;
769 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
770 gpuQueryMgr_ = make_unique<GpuQueryManager>();
771 #if RENDER_HAS_GL_BACKEND
772 if (device_.GetBackendType() == DeviceBackendType::OPENGL) {
773 validGpuQueries_ = true;
774 }
775 #endif
776 #if RENDER_HAS_GLES_BACKEND
777 if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
778 // Check if GL_EXT_disjoint_timer_query is available.
779 validGpuQueries_ = device_.HasExtension("GL_EXT_disjoint_timer_query");
780 }
781 #endif
782 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
783 #endif // RENDER_PERF_ENABLED
784 #if RENDER_HAS_GLES_BACKEND
785 if (device_.GetBackendType() == DeviceBackendType::OPENGLES) {
786 multisampledRenderToTexture_ = device_.HasExtension("GL_EXT_multisampled_render_to_texture2");
787 }
788 #endif
789 PLUGIN_ASSERT(device_.IsActive());
790 PrimeCache(GraphicsState {}); // Initializes cache.
791 glGenFramebuffers(1, &blitImageSourceFbo_);
792 glGenFramebuffers(1, &blitImageDestinationFbo_);
793 #if (RENDER_DEBUG_GPU_RESOURCE_IDS == 1)
794 PLUGIN_LOG_D("fbo id >: %u", blitImageSourceFbo_);
795 PLUGIN_LOG_D("fbo id >: %u", blitImageDestinationFbo_);
796 #endif
797 #if !RENDER_HAS_GLES_BACKEND
798 glEnable(GL_PROGRAM_POINT_SIZE);
799 #endif
800 }
801
~RenderBackendGLES()802 RenderBackendGLES::~RenderBackendGLES()
803 {
804 PLUGIN_ASSERT(device_.IsActive());
805 device_.DeleteFrameBuffer(blitImageSourceFbo_);
806 device_.DeleteFrameBuffer(blitImageDestinationFbo_);
807 }
808
Present(const RenderBackendBackBufferConfiguration & backBufferConfig)809 void RenderBackendGLES::Present(const RenderBackendBackBufferConfiguration& backBufferConfig)
810 {
811 if (!backBufferConfig.swapchainData.empty()) {
812 if (device_.HasSwapchain()) {
813 #if (RENDER_PERF_ENABLED == 1)
814 commonCpuTimers_.present.Begin();
815 #endif
816 for (const auto& swapchainData : backBufferConfig.swapchainData) {
817 #if (RENDER_DEV_ENABLED == 1)
818 if (swapchainData.config.gpuSemaphoreHandle) {
819 // NOTE: not implemented
820 PLUGIN_LOG_E("NodeGraphBackBufferConfiguration semaphore not signaled");
821 }
822 #endif
823 const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapchainData.handle));
824 if (swp) {
825 #if RENDER_GL_FLIP_Y_SWAPCHAIN
826 // Blit and flip our swapchain frame to backbuffer..
827 const auto& sdesc = swp->GetDesc();
828 if (scissorEnabled_) {
829 glDisable(GL_SCISSOR_TEST);
830 scissorEnabled_ = false;
831 }
832 const auto& platSwapchain = swp->GetPlatformData();
833 device_.BindReadFrameBuffer(platSwapchain.fbos[presentationInfo_.swapchainImageIndex]);
834 device_.BindWriteFrameBuffer(0); // FBO 0 is the surface bound to current context..
835 glBlitFramebuffer(0, 0, (GLint)sdesc.width, (GLint)sdesc.height, 0, (GLint)sdesc.height,
836 (GLint)sdesc.width, 0, GL_COLOR_BUFFER_BIT, GL_NEAREST);
837 device_.BindReadFrameBuffer(0);
838 #endif
839 device_.SwapBuffers(*swp);
840 }
841 }
842 #if (RENDER_PERF_ENABLED == 1)
843 commonCpuTimers_.present.End();
844 #endif
845 }
846 }
847 }
848
ResetState()849 void RenderBackendGLES::ResetState()
850 {
851 boundProgram_ = {};
852 boundIndexBuffer_ = {};
853 vertexAttribBinds_ = 0;
854 renderingToDefaultFbo_ = false;
855 boundComputePipeline_ = nullptr;
856 boundGraphicsPipeline_ = nullptr;
857 currentPsoHandle_ = {};
858 renderArea_ = {};
859 activeRenderPass_ = {};
860 currentSubPass_ = 0;
861 currentFrameBuffer_ = nullptr;
862 scissorBoxUpdated_ = viewportDepthRangeUpdated_ = viewportUpdated_ = true;
863 inRenderpass_ = 0;
864 }
865
ResetBindings()866 void RenderBackendGLES::ResetBindings()
867 {
868 for (auto& b : boundObjects_) {
869 b.dirty = true;
870 }
871 boundComputePipeline_ = nullptr;
872 boundGraphicsPipeline_ = nullptr;
873 currentPsoHandle_ = {};
874 }
875
Render(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)876 void RenderBackendGLES::Render(
877 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
878 {
879 // NOTE: all command lists are validated before entering here
880 PLUGIN_ASSERT(device_.IsActive());
881 #if (RENDER_PERF_ENABLED == 1)
882 commonCpuTimers_.full.Begin();
883 commonCpuTimers_.acquire.Begin();
884 #endif
885 presentationInfo_ = {};
886
887 if (device_.HasSwapchain() && (!backBufferConfig.swapchainData.empty())) {
888 for (const auto& swapData : backBufferConfig.swapchainData) {
889 if (const auto* swp = static_cast<const SwapchainGLES*>(device_.GetSwapchain(swapData.handle))) {
890 presentationInfo_.swapchainImageIndex = swp->GetNextImage();
891 const Device::SwapchainData swapchainData = device_.GetSwapchainData(swapData.handle);
892 if (presentationInfo_.swapchainImageIndex < swapchainData.imageViewCount) {
893 // remap image to backbuffer
894 const RenderHandle currentSwapchainHandle =
895 swapchainData.imageViews[presentationInfo_.swapchainImageIndex];
896 // special swapchain remapping
897 gpuResourceMgr_.RenderBackendImmediateRemapGpuImageHandle(swapData.handle, currentSwapchainHandle);
898 }
899 }
900 }
901 }
902 #if (RENDER_PERF_ENABLED == 1)
903 commonCpuTimers_.acquire.End();
904
905 StartFrameTimers(renderCommandFrameData);
906 commonCpuTimers_.execute.Begin();
907 #endif
908 // global begin backend frame
909 auto& descriptorSetMgr = (DescriptorSetManagerGles&)device_.GetDescriptorSetManager();
910 descriptorSetMgr.BeginBackendFrame();
911
912 // Reset bindings.
913 ResetState();
914 for (const auto& ref : renderCommandFrameData.renderCommandContexts) {
915 // Reset bindings between command lists..
916 ResetBindings();
917 RenderSingleCommandList(ref);
918 }
919 #if (RENDER_PERF_ENABLED == 1)
920 commonCpuTimers_.execute.End();
921 #endif
922 RenderProcessEndCommandLists(renderCommandFrameData, backBufferConfig);
923 #if (RENDER_PERF_ENABLED == 1)
924 commonCpuTimers_.full.End();
925 EndFrameTimers();
926 #endif
927 }
928
RenderProcessEndCommandLists(RenderCommandFrameData & renderCommandFrameData,const RenderBackendBackBufferConfiguration & backBufferConfig)929 void RenderBackendGLES::RenderProcessEndCommandLists(
930 RenderCommandFrameData& renderCommandFrameData, const RenderBackendBackBufferConfiguration& backBufferConfig)
931 {
932 if (auto* frameSync = static_cast<RenderFrameSyncGLES*>(renderCommandFrameData.renderFrameSync); frameSync) {
933 frameSync->GetFrameFence();
934 }
935 // signal external GPU fences
936 if (renderCommandFrameData.renderFrameUtil && renderCommandFrameData.renderFrameUtil->HasGpuSignals()) {
937 auto externalSignals = renderCommandFrameData.renderFrameUtil->GetFrameGpuSignalData();
938 const auto externalSemaphores = renderCommandFrameData.renderFrameUtil->GetGpuSemaphores();
939 PLUGIN_ASSERT(externalSignals.size() == externalSemaphores.size());
940 if (externalSignals.size() == externalSemaphores.size()) {
941 for (size_t sigIdx = 0; sigIdx < externalSignals.size(); ++sigIdx) {
942 // needs to be false
943 if (!externalSignals[sigIdx].signaled && (externalSemaphores[sigIdx])) {
944 if (const auto* gs = (const GpuSemaphoreGles*)externalSemaphores[sigIdx].get(); gs) {
945 auto& plat = const_cast<GpuSemaphorePlatformDataGles&>(gs->GetPlatformData());
946 // NOTE: currently could create only one GPU sync
947 #if RENDER_HAS_GLES_BACKEND
948 const auto disp =
949 static_cast<const DevicePlatformDataGLES &>(device_.GetEglState().GetPlatformData())
950 .display;
951 EGLSyncKHR sync = eglCreateSyncKHR(disp, EGL_SYNC_NATIVE_FENCE_ANDROID, nullptr);
952 if (sync == EGL_NO_SYNC_KHR) {
953 PLUGIN_LOG_E("eglCreateSyncKHR fail");
954 }
955 #elif RENDER_HAS_GL_BACKEND
956 GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
957 #else
958 uint64_t sync = 0;
959 PLUGIN_LOG_E("no supported backend to create fence");
960 #endif
961 plat.sync = static_cast<uint64_t>(reinterpret_cast<uintptr_t>(sync));
962 externalSignals[sigIdx].gpuSignalResourceHandle = plat.sync;
963 externalSignals[sigIdx].signaled = true;
964
965 // NOTE: client is expected to add code for the wait with glClientWaitSync(sync, X, 0)
966 }
967 }
968 }
969 }
970 }
971 }
972
RenderCommandUndefined(const RenderCommandWithType & renderCommand)973 void RenderBackendGLES::RenderCommandUndefined(const RenderCommandWithType& renderCommand)
974 {
975 PLUGIN_ASSERT_MSG(false, "non-valid render command");
976 }
977
RenderSingleCommandList(const RenderCommandContext & renderCommandCtx)978 void RenderBackendGLES::RenderSingleCommandList(const RenderCommandContext& renderCommandCtx)
979 {
980 // these are validated in render graph
981 managers_ = { renderCommandCtx.nodeContextPsoMgr, renderCommandCtx.nodeContextPoolMgr,
982 renderCommandCtx.nodeContextDescriptorSetMgr, renderCommandCtx.renderBarrierList };
983
984 managers_.poolMgr->BeginBackendFrame();
985 managers_.psoMgr->BeginBackendFrame();
986 #if (RENDER_PERF_ENABLED == 1) || (RENDER_DEBUG_MARKERS_ENABLED == 1)
987 const auto& debugName = renderCommandCtx.debugName;
988 #endif
989 #if (RENDER_PERF_ENABLED == 1)
990 perfCounters_ = {};
991 PLUGIN_ASSERT(timers_.count(debugName) == 1);
992 PerfDataSet& perfDataSet = timers_[debugName];
993 perfDataSet.cpuTimer.Begin();
994 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
995 if (validGpuQueries_) {
996 #ifdef GL_GPU_DISJOINT_EXT
997 /* Clear disjoint error */
998 GLint disjointOccurred = 0;
999 glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
1000 #endif
1001 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
1002 PLUGIN_ASSERT(gpuQuery);
1003
1004 const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
1005 PLUGIN_ASSERT(platData.queryObject);
1006 glBeginQuery(GL_TIME_ELAPSED_EXT, platData.queryObject);
1007 }
1008 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
1009 #endif // RENDER_PERF_ENABLED
1010 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1011 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)debugName.data());
1012 #endif
1013 commandListValid_ = true;
1014 for (const auto& ref : renderCommandCtx.renderCommandList->GetRenderCommands()) {
1015 PLUGIN_ASSERT(ref.rc);
1016 if (commandListValid_) {
1017 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
1018 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)COMMAND_NAMES[(uint32_t)ref.type]);
1019 #endif
1020 (this->*(COMMAND_HANDLERS[static_cast<uint32_t>(ref.type)]))(ref);
1021 #if RENDER_DEBUG_COMMAND_MARKERS_ENABLED
1022 glPopDebugGroup();
1023 #endif
1024 }
1025 }
1026 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
1027 glPopDebugGroup();
1028 #endif
1029 #if (RENDER_PERF_ENABLED == 1)
1030 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
1031 if (validGpuQueries_) {
1032 glEndQuery(GL_TIME_ELAPSED_EXT);
1033 }
1034 #endif // RENDER_GPU_TIMESTAMP_QUERIES_ENABLED
1035 perfDataSet.cpuTimer.End();
1036 CopyPerfTimeStamp(debugName, perfDataSet);
1037 #endif // RENDER_PERF_ENABLED
1038 }
1039
RenderCommandBindPipeline(const RenderCommandWithType & ref)1040 void RenderBackendGLES::RenderCommandBindPipeline(const RenderCommandWithType& ref)
1041 {
1042 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_PIPELINE);
1043 const auto& renderCmd = *static_cast<const struct RenderCommandBindPipeline*>(ref.rc);
1044 boundProgram_ = {};
1045 if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_COMPUTE) {
1046 PLUGIN_ASSERT(currentFrameBuffer_ == nullptr);
1047 BindComputePipeline(renderCmd);
1048 } else if (renderCmd.pipelineBindPoint == PipelineBindPoint::CORE_PIPELINE_BIND_POINT_GRAPHICS) {
1049 BindGraphicsPipeline(renderCmd);
1050 }
1051 currentPsoHandle_ = renderCmd.psoHandle;
1052 }
1053
BindComputePipeline(const struct RenderCommandBindPipeline & renderCmd)1054 void RenderBackendGLES::BindComputePipeline(const struct RenderCommandBindPipeline& renderCmd)
1055 {
1056 const auto* pso = static_cast<const ComputePipelineStateObjectGLES*>(
1057 managers_.psoMgr->GetComputePso(renderCmd.psoHandle, nullptr));
1058 if (pso) {
1059 const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1060 // Setup descriptorset bind cache..
1061 SetupCache(data.pipelineLayout);
1062 }
1063 boundComputePipeline_ = pso;
1064 boundGraphicsPipeline_ = nullptr;
1065 }
1066
SetupCache(const PipelineLayout & pipelineLayout)1067 void RenderBackendGLES::SetupCache(const PipelineLayout& pipelineLayout)
1068 {
1069 // based on pipeline layout. (note that compatible sets should "save state")
1070 for (uint32_t set = 0; set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT; ++set) {
1071 // mark unmatching sets dirty (all for now)
1072 // resize the cache stuffs.
1073 const auto& s = pipelineLayout.descriptorSetLayouts[set];
1074 if (s.set == PipelineLayoutConstants::INVALID_INDEX) {
1075 boundObjects_[set].dirty = true;
1076 #if RENDER_HAS_GLES_BACKEND
1077 boundObjects_[set].oesBinds.clear();
1078 #endif
1079 boundObjects_[set].resources.clear();
1080 continue;
1081 }
1082 PLUGIN_ASSERT(s.set == set);
1083
1084 uint32_t maxB = 0;
1085 // NOTE: compatibility optimizations?
1086 // NOTE: we expect bindings to be sorted.
1087 if (s.bindings.back().binding == s.bindings.size() - 1U) {
1088 // since the last binding matches the size, expect it to be continuous.
1089 maxB = static_cast<uint32_t>(s.bindings.size());
1090 } else {
1091 // Sparse binding.
1092 // NOTE: note sparse sets will waste memory here. (see notes in
1093 // https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkDescriptorSetLayoutBinding.html)
1094 for (const auto& bind : s.bindings) {
1095 maxB = Math::max(maxB, bind.binding);
1096 }
1097 maxB += 1U; // zero based bindings..
1098 }
1099 if (boundObjects_[set].resources.size() != maxB) {
1100 // resource count change.. (so it's dirty then)
1101 boundObjects_[set].dirty = true;
1102 #if RENDER_HAS_GLES_BACKEND
1103 boundObjects_[set].oesBinds.clear();
1104 #endif
1105 boundObjects_[set].resources.clear(); // clear because we don't care what it had before.
1106 boundObjects_[set].resources.resize(maxB);
1107 }
1108
1109 for (const auto& b : s.bindings) {
1110 auto& o = boundObjects_[set].resources[b.binding];
1111 // ignore b.shaderStageFlags for now.
1112 if ((o.resources.size() != b.descriptorCount) || (o.descriptorType != b.descriptorType)) {
1113 // mark set dirty, since "not matching"
1114 o.resources.clear();
1115 o.resources.resize(b.descriptorCount);
1116 o.descriptorType = b.descriptorType;
1117 boundObjects_[set].dirty = true;
1118 #if RENDER_HAS_GLES_BACKEND
1119 boundObjects_[set].oesBinds.clear();
1120 #endif
1121 }
1122 }
1123 }
1124 }
1125
BindGraphicsPipeline(const struct RenderCommandBindPipeline & renderCmd)1126 void RenderBackendGLES::BindGraphicsPipeline(const struct RenderCommandBindPipeline& renderCmd)
1127 {
1128 const auto* pso = static_cast<const GraphicsPipelineStateObjectGLES*>(
1129 managers_.psoMgr->GetGraphicsPso(renderCmd.psoHandle, activeRenderPass_.renderPassDesc,
1130 activeRenderPass_.subpasses, activeRenderPass_.subpassStartIndex, 0, nullptr, nullptr));
1131 if (pso) {
1132 const auto& data = static_cast<const PipelineStateObjectPlatformDataGL&>(pso->GetPlatformData());
1133 dynamicStateFlags_ = data.dynamicStateFlags;
1134 DoGraphicsState(data.graphicsState);
1135 // NOTE: Deprecate (default viewport/scissor should be set from default targets at some point)
1136 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_VIEWPORT)) {
1137 SetViewport(renderArea_, ViewportDesc { 0.0f, 0.0f, static_cast<float>(renderArea_.extentWidth),
1138 static_cast<float>(renderArea_.extentHeight), 0.0f, 1.0f });
1139 }
1140 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_SCISSOR)) {
1141 SetScissor(renderArea_, ScissorDesc { 0, 0, renderArea_.extentWidth, renderArea_.extentHeight });
1142 }
1143 // Setup descriptorset bind cache..
1144 SetupCache(data.pipelineLayout);
1145 }
1146 boundComputePipeline_ = nullptr;
1147 boundGraphicsPipeline_ = pso;
1148 }
1149
RenderCommandDraw(const RenderCommandWithType & ref)1150 void RenderBackendGLES::RenderCommandDraw(const RenderCommandWithType& ref)
1151 {
1152 PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW);
1153 const auto& renderCmd = *static_cast<struct RenderCommandDraw*>(ref.rc);
1154 if (!boundGraphicsPipeline_) {
1155 return;
1156 }
1157 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1158 BindResources();
1159 const auto type = GetPrimFromTopology(topology_);
1160 const auto instanceCount = static_cast<GLsizei>(renderCmd.instanceCount);
1161 // firstInstance is not supported yet, need to set the SPIRV_Cross generated uniform
1162 // "SPIRV_Cross_BaseInstance" to renderCmd.firstInstance;
1163 if (renderCmd.indexCount) {
1164 uintptr_t offsetp = boundIndexBuffer_.offset;
1165 GLenum indexType = GL_UNSIGNED_SHORT;
1166 switch (boundIndexBuffer_.type) {
1167 case CORE_INDEX_TYPE_UINT16:
1168 offsetp += renderCmd.firstIndex * sizeof(uint16_t);
1169 indexType = GL_UNSIGNED_SHORT;
1170 break;
1171 case CORE_INDEX_TYPE_UINT32:
1172 offsetp += renderCmd.firstIndex * sizeof(uint32_t);
1173 indexType = GL_UNSIGNED_INT;
1174 break;
1175 default:
1176 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1177 break;
1178 }
1179 const auto indexCount = static_cast<const GLsizei>(renderCmd.indexCount);
1180 const auto vertexOffset = static_cast<const GLsizei>(renderCmd.vertexOffset);
1181 const void* offset = reinterpret_cast<const void*>(offsetp);
1182 if (renderCmd.instanceCount > 1) {
1183 if (vertexOffset) {
1184 glDrawElementsInstancedBaseVertex(type, indexCount, indexType, offset, instanceCount, vertexOffset);
1185 } else {
1186 glDrawElementsInstanced(type, indexCount, indexType, offset, instanceCount);
1187 }
1188 } else {
1189 if (vertexOffset) {
1190 glDrawElementsBaseVertex(type, indexCount, indexType, offset, vertexOffset);
1191 } else {
1192 glDrawElements(type, indexCount, indexType, offset);
1193 }
1194 }
1195 #if (RENDER_PERF_ENABLED == 1)
1196 ++perfCounters_.drawCount;
1197 perfCounters_.instanceCount += renderCmd.instanceCount;
1198 perfCounters_.triangleCount += renderCmd.indexCount * renderCmd.instanceCount;
1199 #endif
1200 } else {
1201 const auto firstVertex = static_cast<const GLsizei>(renderCmd.firstVertex);
1202 const auto vertexCount = static_cast<const GLsizei>(renderCmd.vertexCount);
1203 if (renderCmd.instanceCount > 1) {
1204 glDrawArraysInstanced(type, firstVertex, vertexCount, instanceCount);
1205 } else {
1206 glDrawArrays(type, firstVertex, vertexCount);
1207 }
1208 #if (RENDER_PERF_ENABLED == 1)
1209 ++perfCounters_.drawCount;
1210 perfCounters_.instanceCount += renderCmd.instanceCount;
1211 perfCounters_.triangleCount += (renderCmd.vertexCount * 3) * renderCmd.instanceCount; // 3: vertex dimension
1212 #endif
1213 }
1214 }
1215
RenderCommandDrawIndirect(const RenderCommandWithType & ref)1216 void RenderBackendGLES::RenderCommandDrawIndirect(const RenderCommandWithType& ref)
1217 {
1218 PLUGIN_ASSERT(ref.type == RenderCommandType::DRAW_INDIRECT);
1219 const auto& renderCmd = *static_cast<const struct RenderCommandDrawIndirect*>(ref.rc);
1220 if (!boundGraphicsPipeline_) {
1221 return;
1222 }
1223 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
1224 BindResources();
1225 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1226 const auto& plat = gpuBuffer->GetPlatformData();
1227 device_.BindBuffer(GL_DRAW_INDIRECT_BUFFER, plat.buffer);
1228 const auto type = GetPrimFromTopology(topology_);
1229 auto offset = static_cast<GLintptr>(renderCmd.offset);
1230 if (renderCmd.drawType == DrawType::DRAW_INDEXED_INDIRECT) {
1231 GLenum indexType = GL_UNSIGNED_SHORT;
1232 switch (boundIndexBuffer_.type) {
1233 case CORE_INDEX_TYPE_UINT16:
1234 indexType = GL_UNSIGNED_SHORT;
1235 break;
1236 case CORE_INDEX_TYPE_UINT32:
1237 indexType = GL_UNSIGNED_INT;
1238 break;
1239 default:
1240 PLUGIN_ASSERT_MSG(false, "Invalid indexbuffer type");
1241 break;
1242 }
1243 for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1244 glDrawElementsIndirect(type, indexType, reinterpret_cast<const void*>(offset));
1245 offset += renderCmd.stride;
1246 }
1247 } else {
1248 for (uint32_t i = 0; i < renderCmd.drawCount; ++i) {
1249 glDrawArraysIndirect(type, reinterpret_cast<const void*>(offset));
1250 offset += renderCmd.stride;
1251 }
1252 }
1253 #if (RENDER_PERF_ENABLED == 1)
1254 perfCounters_.drawIndirectCount += renderCmd.drawCount;
1255 #endif
1256 }
1257 }
1258
RenderCommandDispatch(const RenderCommandWithType & ref)1259 void RenderBackendGLES::RenderCommandDispatch(const RenderCommandWithType& ref)
1260 {
1261 PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH);
1262 const auto& renderCmd = *static_cast<const struct RenderCommandDispatch*>(ref.rc);
1263 if (!boundComputePipeline_) {
1264 return;
1265 }
1266 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1267 BindResources();
1268 glDispatchCompute(renderCmd.groupCountX, renderCmd.groupCountY, renderCmd.groupCountZ);
1269 #if (RENDER_PERF_ENABLED == 1)
1270 ++perfCounters_.dispatchCount;
1271 #endif
1272 }
1273
RenderCommandDispatchIndirect(const RenderCommandWithType & ref)1274 void RenderBackendGLES::RenderCommandDispatchIndirect(const RenderCommandWithType& ref)
1275 {
1276 PLUGIN_ASSERT(ref.type == RenderCommandType::DISPATCH_INDIRECT);
1277 const auto& renderCmd = *static_cast<const struct RenderCommandDispatchIndirect*>(ref.rc);
1278 if (!boundComputePipeline_) {
1279 return;
1280 }
1281 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
1282 BindResources();
1283 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.argsHandle); gpuBuffer) {
1284 const auto& plat = gpuBuffer->GetPlatformData();
1285 device_.BindBuffer(GL_DISPATCH_INDIRECT_BUFFER, plat.buffer);
1286 glDispatchComputeIndirect(static_cast<GLintptr>(renderCmd.offset));
1287 #if (RENDER_PERF_ENABLED == 1)
1288 ++perfCounters_.dispatchIndirectCount;
1289 #endif
1290 }
1291 }
1292
ClearScissorInit(const RenderPassDesc::RenderArea & aArea)1293 void RenderBackendGLES::ClearScissorInit(const RenderPassDesc::RenderArea& aArea)
1294 {
1295 resetScissor_ = false; // need to reset scissor state after clear?
1296 clearScissorSet_ = true; // need to setup clear scissors before clear?
1297 clearScissor_ = aArea; // area to be cleared
1298 if (scissorPrimed_) { // have scissors been set yet?
1299 if ((!scissorBoxUpdated_) && // if there is a pending scissor change, ignore the scissorbox.
1300 (clearScissor_.offsetX == scissorBox_.offsetX) && (clearScissor_.offsetY == scissorBox_.offsetY) &&
1301 (clearScissor_.extentWidth == scissorBox_.extentWidth) &&
1302 (clearScissor_.extentHeight == scissorBox_.extentHeight)) {
1303 // Current scissors match clearscissor area, so no need to set it again.
1304 clearScissorSet_ = false;
1305 }
1306 }
1307 }
1308
ClearScissorSet()1309 void RenderBackendGLES::ClearScissorSet()
1310 {
1311 if (clearScissorSet_) { // do we need to set clear scissors.
1312 clearScissorSet_ = false; // clear scissors have been set now.
1313 resetScissor_ = true; // we are modifying scissors, so remember to reset them afterwards.
1314 glScissor(static_cast<GLint>(clearScissor_.offsetX), static_cast<GLint>(clearScissor_.offsetY),
1315 static_cast<GLsizei>(clearScissor_.extentWidth), static_cast<GLsizei>(clearScissor_.extentHeight));
1316 }
1317 }
1318
ClearScissorReset()1319 void RenderBackendGLES::ClearScissorReset()
1320 {
1321 if (resetScissor_) { // need to reset correct scissors?
1322 if (!scissorPrimed_) {
1323 // scissors have not been set yet, so use clearbox as current cache state (and don't change scissor
1324 // setting)
1325 scissorPrimed_ = true;
1326 scissorBox_.offsetX = clearScissor_.offsetX;
1327 scissorBox_.offsetY = clearScissor_.offsetY;
1328 scissorBox_.extentHeight = clearScissor_.extentHeight;
1329 scissorBox_.extentWidth = clearScissor_.extentWidth;
1330 } else {
1331 // Restore scissor box to cached state. (update scissors when needed, since clearBox != scissorBox)
1332 scissorBoxUpdated_ = true; // ie. request to update scissor state.
1333 }
1334 }
1335 }
1336
HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc * > colorAttachments)1337 void RenderBackendGLES::HandleColorAttachments(const array_view<const RenderPassDesc::AttachmentDesc*> colorAttachments)
1338 {
1339 constexpr ColorComponentFlags clearAll = CORE_COLOR_COMPONENT_R_BIT | CORE_COLOR_COMPONENT_G_BIT |
1340 CORE_COLOR_COMPONENT_B_BIT | CORE_COLOR_COMPONENT_A_BIT;
1341 const auto& cBlend = cacheState_.colorBlendState;
1342 for (uint32_t idx = 0; idx < colorAttachments.size(); ++idx) {
1343 if (colorAttachments[idx] == nullptr) {
1344 continue;
1345 }
1346 const auto& ref = *(colorAttachments[idx]);
1347 if (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR) {
1348 const auto& cBlendState = cBlend.colorAttachments[idx];
1349 if (clearAll != cBlendState.colorWriteMask) {
1350 glColorMaski(idx, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
1351 }
1352 ClearScissorSet();
1353 // glClearBufferfv only for float formats?
1354 // glClearBufferiv & glClearbufferuv only for integer formats?
1355 glClearBufferfv(GL_COLOR, static_cast<GLint>(idx), ref.clearValue.color.float32);
1356 if (clearAll != cBlendState.colorWriteMask) {
1357 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1358 glColorMaski(idx, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
1359 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
1360 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
1361 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
1362 }
1363 }
1364 }
1365 }
1366
HandleDepthAttachment(const RenderPassDesc::AttachmentDesc & depthAttachment)1367 void RenderBackendGLES::HandleDepthAttachment(const RenderPassDesc::AttachmentDesc& depthAttachment)
1368 {
1369 const GLuint allBits = 0xFFFFFFFFu;
1370 const auto& ref = depthAttachment;
1371 const bool clearDepth = (ref.loadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1372 const bool clearStencil = (ref.stencilLoadOp == AttachmentLoadOp::CORE_ATTACHMENT_LOAD_OP_CLEAR);
1373 // Change state if needed.
1374 if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1375 glDepthMask(GL_TRUE);
1376 }
1377 if (clearStencil) {
1378 if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1379 glStencilMaskSeparate(GL_FRONT, allBits);
1380 }
1381 if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1382 glStencilMaskSeparate(GL_BACK, allBits);
1383 }
1384 }
1385 if (clearDepth || clearStencil) {
1386 // Set the scissors for clear..
1387 ClearScissorSet();
1388 }
1389 // Do clears.
1390 if (clearDepth && clearStencil) {
1391 glClearBufferfi(GL_DEPTH_STENCIL, 0, ref.clearValue.depthStencil.depth,
1392 static_cast<GLint>(ref.clearValue.depthStencil.stencil));
1393 } else if (clearDepth) {
1394 glClearBufferfv(GL_DEPTH, 0, &ref.clearValue.depthStencil.depth);
1395 } else if (clearStencil) {
1396 glClearBufferiv(GL_STENCIL, 0, reinterpret_cast<const GLint*>(&ref.clearValue.depthStencil.stencil));
1397 }
1398
1399 // Restore cached state, if we touched the state.
1400 if ((clearDepth) && (!cacheState_.depthStencilState.enableDepthWrite)) {
1401 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1402 glDepthMask(GL_FALSE);
1403 }
1404 if (clearStencil) {
1405 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1406 if (cacheState_.depthStencilState.frontStencilOpState.writeMask != allBits) {
1407 glStencilMaskSeparate(GL_FRONT, cacheState_.depthStencilState.frontStencilOpState.writeMask);
1408 }
1409 if (cacheState_.depthStencilState.backStencilOpState.writeMask != allBits) {
1410 glStencilMaskSeparate(GL_BACK, cacheState_.depthStencilState.backStencilOpState.writeMask);
1411 }
1412 }
1413 }
1414
DoSubPass(uint32_t subPass)1415 void RenderBackendGLES::DoSubPass(uint32_t subPass)
1416 {
1417 if (currentFrameBuffer_ == nullptr) {
1418 // Completely invalid state in backend.
1419 return;
1420 }
1421 const auto& rpd = activeRenderPass_.renderPassDesc;
1422 const auto& sb = activeRenderPass_.subpasses[subPass];
1423
1424 // If there's no FBO activate the with swapchain handle so that drawing happens to the correct surface.
1425 if (!currentFrameBuffer_->fbos[subPass].fbo && (sb.colorAttachmentCount == 1U)) {
1426 auto color = rpd.attachmentHandles[sb.colorAttachmentIndices[0]];
1427 device_.Activate(color);
1428 }
1429 device_.BindFrameBuffer(currentFrameBuffer_->fbos[subPass].fbo);
1430 ClearScissorInit(renderArea_);
1431 if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1432 SetState(GL_RASTERIZER_DISCARD, GL_FALSE);
1433 }
1434 {
1435 // NOTE: clear is not yet optimal. depth, stencil and color should be cleared using ONE glClear call if
1436 // possible. (ie. all buffers at once)
1437 renderingToDefaultFbo_ = false;
1438 if (sb.colorAttachmentCount > 0) {
1439 // collect color attachment infos..
1440 const RenderPassDesc::AttachmentDesc*
1441 colorAttachments[PipelineStateConstants::MAX_RENDER_PASS_ATTACHMENT_COUNT];
1442 for (uint32_t ci = 0; ci < sb.colorAttachmentCount; ci++) {
1443 uint32_t index = sb.colorAttachmentIndices[ci];
1444 if (resolveToBackbuffer_[index]) {
1445 // NOTE: this could fail with multiple color attachments....
1446 renderingToDefaultFbo_ = true;
1447 }
1448 if (!attachmentCleared_[index]) {
1449 attachmentCleared_[index] = true;
1450 colorAttachments[ci] = &rpd.attachments[index];
1451 } else {
1452 colorAttachments[ci] = nullptr;
1453 }
1454 }
1455 HandleColorAttachments(array_view(colorAttachments, sb.colorAttachmentCount));
1456 }
1457 if (sb.depthAttachmentCount) {
1458 if (!attachmentCleared_[sb.depthAttachmentIndex]) {
1459 attachmentCleared_[sb.depthAttachmentIndex] = true;
1460 HandleDepthAttachment(rpd.attachments[sb.depthAttachmentIndex]);
1461 }
1462 }
1463 }
1464 if (cacheState_.rasterizationState.enableRasterizerDiscard) { // Rasterizer discard affects glClearBuffer*
1465 // NOTE: We might not need to restore here.. (we need to peek in to the command list to find out...)
1466 SetState(GL_RASTERIZER_DISCARD, GL_TRUE);
1467 }
1468 ClearScissorReset();
1469 }
1470
ScanPasses(const RenderPassDesc & rpd)1471 void RenderBackendGLES::ScanPasses(const RenderPassDesc& rpd)
1472 {
1473 for (uint32_t sub = 0; sub < rpd.subpassCount; sub++) {
1474 const auto& currentSubPass = activeRenderPass_.subpasses[sub];
1475 for (uint32_t ci = 0; ci < currentSubPass.resolveAttachmentCount; ci++) {
1476 uint32_t resolveTo = currentSubPass.resolveAttachmentIndices[ci];
1477 if (attachmentFirstUse_[resolveTo] == 0xFFFFFFFF) {
1478 attachmentFirstUse_[resolveTo] = sub;
1479 }
1480 attachmentLastUse_[resolveTo] = sub;
1481 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[resolveTo]->GetPlatformData());
1482 if ((p.image == 0) && (p.renderBuffer == 0)) {
1483 // mark the "resolveFrom" (ie. the colorattachment) as "backbuffer-like", since we resolve to
1484 // backbuffer...
1485 uint32_t resolveFrom = currentSubPass.colorAttachmentIndices[ci];
1486 resolveToBackbuffer_[resolveFrom] = true;
1487 }
1488 }
1489 for (uint32_t ci = 0; ci < currentSubPass.inputAttachmentCount; ci++) {
1490 uint32_t index = currentSubPass.inputAttachmentIndices[ci];
1491 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1492 attachmentFirstUse_[index] = sub;
1493 }
1494 attachmentLastUse_[index] = sub;
1495 }
1496 for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1497 uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1498 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1499 attachmentFirstUse_[index] = sub;
1500 }
1501 attachmentLastUse_[index] = sub;
1502 if (attachmentImage_[index]) {
1503 const auto& p = static_cast<const GpuImagePlatformDataGL&>(attachmentImage_[index]->GetPlatformData());
1504 if ((p.image == 0) && (p.renderBuffer == 0)) {
1505 resolveToBackbuffer_[index] = true;
1506 }
1507 }
1508 }
1509 if (currentSubPass.depthAttachmentCount > 0) {
1510 uint32_t index = currentSubPass.depthAttachmentIndex;
1511 if (attachmentFirstUse_[index] == 0xFFFFFFFF) {
1512 attachmentFirstUse_[index] = sub;
1513 }
1514 attachmentLastUse_[index] = sub;
1515 }
1516 }
1517 }
1518
RenderCommandBeginRenderPass(const RenderCommandWithType & ref)1519 void RenderBackendGLES::RenderCommandBeginRenderPass(const RenderCommandWithType& ref)
1520 {
1521 PLUGIN_ASSERT(ref.type == RenderCommandType::BEGIN_RENDER_PASS);
1522 const auto& renderCmd = *static_cast<const struct RenderCommandBeginRenderPass*>(ref.rc);
1523 switch (renderCmd.beginType) {
1524 case RenderPassBeginType::RENDER_PASS_BEGIN: {
1525 ++inRenderpass_;
1526 PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES beginrenderpass mInRenderpass %u", inRenderpass_);
1527 activeRenderPass_ = renderCmd; // Store this because we need it later (in NextRenderPass)
1528
1529 const auto& rpd = activeRenderPass_.renderPassDesc;
1530 renderArea_ = rpd.renderArea; // can subpasses have different render areas?
1531 auto& cpm = *(static_cast<NodeContextPoolManagerGLES*>(managers_.poolMgr));
1532 if (multisampledRenderToTexture_) {
1533 cpm.FilterRenderPass(activeRenderPass_);
1534 }
1535 currentFrameBuffer_ = cpm.GetFramebuffer(cpm.GetFramebufferHandle(activeRenderPass_));
1536 if (currentFrameBuffer_ == nullptr) {
1537 // Completely invalid state in backend.
1538 commandListValid_ = false;
1539 --inRenderpass_;
1540 return;
1541 }
1542 PLUGIN_ASSERT_MSG(
1543 activeRenderPass_.subpassStartIndex == 0, "activeRenderPass_.subpassStartIndex != 0 not handled!");
1544 currentSubPass_ = 0;
1545 // find first and last use, clear clearflags. (this could be cached in the lowlewel classes)
1546 for (uint32_t i = 0; i < rpd.attachmentCount; i++) {
1547 attachmentCleared_[i] = false;
1548 attachmentFirstUse_[i] = 0xFFFFFFFF;
1549 attachmentLastUse_[i] = 0;
1550 resolveToBackbuffer_[i] = false;
1551 attachmentImage_[i] =
1552 static_cast<const GpuImageGLES*>(gpuResourceMgr_.GetImage(rpd.attachmentHandles[i]));
1553 }
1554 ScanPasses(rpd);
1555 DoSubPass(0);
1556 #if (RENDER_PERF_ENABLED == 1)
1557 ++perfCounters_.renderPassCount;
1558 #endif
1559 } break;
1560
1561 case RenderPassBeginType::RENDER_PASS_SUBPASS_BEGIN: {
1562 currentSubPass_ = renderCmd.subpassStartIndex;
1563 PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1564 DoSubPass(activeRenderPass_.subpassStartIndex);
1565 } break;
1566
1567 default:
1568 break;
1569 }
1570 }
1571
RenderCommandNextSubpass(const RenderCommandWithType & ref)1572 void RenderBackendGLES::RenderCommandNextSubpass(const RenderCommandWithType& ref)
1573 {
1574 PLUGIN_ASSERT(ref.type == RenderCommandType::NEXT_SUBPASS);
1575 const auto& renderCmd = *static_cast<const struct RenderCommandNextSubpass*>(ref.rc);
1576 PLUGIN_UNUSED(renderCmd);
1577 PLUGIN_ASSERT(renderCmd.subpassContents == SubpassContents::CORE_SUBPASS_CONTENTS_INLINE);
1578 ++currentSubPass_;
1579 PLUGIN_ASSERT(currentSubPass_ < activeRenderPass_.renderPassDesc.subpassCount);
1580 DoSubPass(currentSubPass_);
1581 }
1582
InvalidateDepthStencil(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1583 int32_t RenderBackendGLES::InvalidateDepthStencil(
1584 array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1585 {
1586 int32_t depthCount = 0;
1587 if (currentSubPass.depthAttachmentCount == 0) {
1588 return depthCount; // early out
1589 }
1590 const uint32_t index = currentSubPass.depthAttachmentIndex;
1591 if (attachmentLastUse_[index] != currentSubPass_) {
1592 return depthCount; // early out
1593 }
1594 // is last use of the attachment
1595 const auto& image = attachmentImage_[index];
1596 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1597 // NOTE: we expect the depth to be in FBO in this case even if there would be a depth target in render pass
1598 if ((dplat.image || dplat.renderBuffer) && (!renderingToDefaultFbo_)) {
1599 bool depth = false;
1600 bool stencil = false;
1601 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1602 if ((dplat.format == GL_DEPTH_COMPONENT) || (dplat.format == GL_DEPTH_STENCIL)) {
1603 depth = true;
1604 }
1605 }
1606 if (rpd.attachments[index].stencilStoreOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1607 if ((dplat.format == GL_STENCIL) || (dplat.format == GL_DEPTH_STENCIL)) {
1608 stencil = true;
1609 }
1610 }
1611 if (depth && stencil) {
1612 invalidateAttachment[0] = GL_DEPTH_STENCIL_ATTACHMENT;
1613 depthCount++;
1614 } else if (stencil) {
1615 invalidateAttachment[0] = GL_STENCIL_ATTACHMENT;
1616 depthCount++;
1617 } else if (depth) {
1618 invalidateAttachment[0] = GL_DEPTH_ATTACHMENT;
1619 depthCount++;
1620 }
1621 }
1622 return depthCount;
1623 }
1624
InvalidateColor(array_view<uint32_t> invalidateAttachment,const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1625 int32_t RenderBackendGLES::InvalidateColor(
1626 array_view<uint32_t> invalidateAttachment, const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1627 {
1628 int32_t colorCount = 0;
1629 // see which parts of the fbo can be invalidated...
1630 // collect color attachment infos..
1631 for (uint32_t ci = 0; ci < currentSubPass.colorAttachmentCount; ci++) {
1632 const uint32_t index = currentSubPass.colorAttachmentIndices[ci];
1633 if (attachmentLastUse_[index] == currentSubPass_) { // is last use of the attachment
1634 if (const auto* image = attachmentImage_[index]) {
1635 const auto& dplat = static_cast<const GpuImagePlatformDataGL&>(image->GetPlatformData());
1636 if (dplat.image || dplat.renderBuffer) {
1637 if (rpd.attachments[index].storeOp == CORE_ATTACHMENT_STORE_OP_DONT_CARE) {
1638 invalidateAttachment[static_cast<size_t>(colorCount)] = GL_COLOR_ATTACHMENT0 + ci;
1639 colorCount++;
1640 }
1641 }
1642 }
1643 }
1644 }
1645 return colorCount;
1646 }
1647
ResolveMSAA(const RenderPassDesc & rpd,const RenderPassSubpassDesc & currentSubPass)1648 uint32_t RenderBackendGLES::ResolveMSAA(const RenderPassDesc& rpd, const RenderPassSubpassDesc& currentSubPass)
1649 {
1650 const GLbitfield mask = ((currentSubPass.resolveAttachmentCount > 0u) ? GL_COLOR_BUFFER_BIT : 0u) |
1651 ((currentSubPass.depthResolveAttachmentCount > 0u) ? GL_DEPTH_BUFFER_BIT : 0u);
1652 if (!mask) {
1653 return GL_FRAMEBUFFER;
1654 }
1655
1656 if (scissorEnabled_) {
1657 glDisable(GL_SCISSOR_TEST);
1658 scissorEnabled_ = false;
1659 }
1660
1661 // Resolve MSAA buffers.
1662 // NOTE: ARM recommends NOT to use glBlitFramebuffer here
1663 if (!currentSubPass.viewMask) {
1664 device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1665 device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1666
1667 glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1668 static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1669 static_cast<GLint>(currentFrameBuffer_->height), mask, GL_NEAREST);
1670 } else {
1671 // Layers need to be resolved one by one. Create temporary FBOs and go through the layers.
1672 GLuint frameBuffers[2U]; // 2 : size
1673 glGenFramebuffers(2, frameBuffers); // 2 : size
1674 device_.BindReadFrameBuffer(frameBuffers[0U]);
1675 device_.BindWriteFrameBuffer(frameBuffers[1U]);
1676
1677 const auto& srcImage =
1678 gpuResourceMgr_.GetImage(rpd.attachmentHandles[currentSubPass.colorAttachmentIndices[0U]]);
1679 const auto& srcPlat = static_cast<const GpuImagePlatformDataGL&>(srcImage->GetBasePlatformData());
1680 const auto& dstImage =
1681 gpuResourceMgr_.GetImage(rpd.attachmentHandles[currentSubPass.resolveAttachmentIndices[0U]]);
1682 const auto& dstPlat = static_cast<const GpuImagePlatformDataGL&>(dstImage->GetBasePlatformData());
1683
1684 auto viewMask = currentSubPass.viewMask;
1685 auto layer = 0;
1686 while (viewMask) {
1687 glFramebufferTextureLayer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcPlat.image, 0, layer);
1688 glFramebufferTextureLayer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstPlat.image, 0, layer);
1689
1690 glBlitFramebuffer(0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1691 static_cast<GLint>(currentFrameBuffer_->height), 0, 0, static_cast<GLint>(currentFrameBuffer_->width),
1692 static_cast<GLint>(currentFrameBuffer_->height), mask, GL_NEAREST);
1693 viewMask >>= 1U;
1694 ++layer;
1695 }
1696 glDeleteFramebuffers(2, frameBuffers); // 2 : buffer size
1697
1698 // invalidation exepcts to find the actual FBOs
1699 device_.BindReadFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].fbo);
1700 device_.BindWriteFrameBuffer(currentFrameBuffer_->fbos[currentSubPass_].resolve);
1701 }
1702 return GL_READ_FRAMEBUFFER;
1703 }
1704
RenderCommandEndRenderPass(const RenderCommandWithType & ref)1705 void RenderBackendGLES::RenderCommandEndRenderPass(const RenderCommandWithType& ref)
1706 {
1707 PLUGIN_ASSERT(ref.type == RenderCommandType::END_RENDER_PASS);
1708 const auto& renderCmd = *static_cast<const struct RenderCommandEndRenderPass*>(ref.rc);
1709 if (renderCmd.endType == RenderPassEndType::END_RENDER_PASS) {
1710 PLUGIN_ASSERT_MSG(inRenderpass_ == 1, "RenderBackendGLES endrenderpass mInRenderpass %u", inRenderpass_);
1711 inRenderpass_--;
1712 }
1713 if (currentFrameBuffer_ == nullptr) {
1714 // Completely invalid state in backend.
1715 return;
1716 }
1717 const auto& rpd = activeRenderPass_.renderPassDesc;
1718 const auto& currentSubPass = activeRenderPass_.subpasses[currentSubPass_];
1719
1720 // Resolve MSAA
1721 const uint32_t fbType = ResolveMSAA(rpd, currentSubPass);
1722
1723 // Finally invalidate color and depth..
1724 GLenum invalidate[PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT + 1] = {};
1725 int32_t invalidateCount = InvalidateColor(invalidate, rpd, currentSubPass);
1726 invalidateCount += InvalidateDepthStencil(
1727 array_view(invalidate + invalidateCount, countof(invalidate) - invalidateCount), rpd, currentSubPass);
1728
1729 // NOTE: all attachments should be the same size AND mCurrentFrameBuffer->width/height should match that!
1730 Invalidate(fbType, invalidateCount, invalidate, rpd, *currentFrameBuffer_);
1731
1732 if (inRenderpass_ == 0) {
1733 currentFrameBuffer_ = nullptr;
1734 }
1735 }
1736
RenderCommandBindVertexBuffers(const RenderCommandWithType & ref)1737 void RenderBackendGLES::RenderCommandBindVertexBuffers(const RenderCommandWithType& ref)
1738 {
1739 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_VERTEX_BUFFERS);
1740 const auto& renderCmd = *static_cast<const struct RenderCommandBindVertexBuffers*>(ref.rc);
1741 PLUGIN_ASSERT(renderCmd.vertexBufferCount > 0);
1742 PLUGIN_ASSERT(renderCmd.vertexBufferCount <= PipelineStateConstants::MAX_VERTEX_BUFFER_COUNT);
1743 if (!boundGraphicsPipeline_) {
1744 return;
1745 }
1746 vertexAttribBinds_ = renderCmd.vertexBufferCount;
1747 for (size_t i = 0; i < renderCmd.vertexBufferCount; i++) {
1748 const auto& currVb = renderCmd.vertexBuffers[i];
1749 if (const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(currVb.bufferHandle); gpuBuffer) {
1750 const auto& plat = gpuBuffer->GetPlatformData();
1751 uintptr_t offset = currVb.bufferOffset;
1752 offset += plat.currentByteOffset;
1753 vertexAttribBindSlots_[i].id = plat.buffer;
1754 vertexAttribBindSlots_[i].offset = static_cast<intptr_t>(offset);
1755 } else {
1756 vertexAttribBindSlots_[i].id = 0;
1757 vertexAttribBindSlots_[i].offset = 0;
1758 }
1759 }
1760 }
1761
RenderCommandBindIndexBuffer(const RenderCommandWithType & ref)1762 void RenderBackendGLES::RenderCommandBindIndexBuffer(const RenderCommandWithType& ref)
1763 {
1764 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_INDEX_BUFFER);
1765 const auto& renderCmd = *static_cast<const struct RenderCommandBindIndexBuffer*>(ref.rc);
1766 if (const GpuBufferGLES* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.indexBuffer.bufferHandle);
1767 gpuBuffer) {
1768 const auto& plat = gpuBuffer->GetPlatformData();
1769 boundIndexBuffer_.offset = renderCmd.indexBuffer.bufferOffset;
1770 boundIndexBuffer_.offset += plat.currentByteOffset;
1771 boundIndexBuffer_.type = renderCmd.indexBuffer.indexType;
1772 boundIndexBuffer_.id = plat.buffer;
1773 }
1774 }
1775
RenderCommandBlitImage(const RenderCommandWithType & ref)1776 void RenderBackendGLES::RenderCommandBlitImage(const RenderCommandWithType& ref)
1777 {
1778 PLUGIN_ASSERT(ref.type == RenderCommandType::BLIT_IMAGE);
1779 const auto& renderCmd = *static_cast<const struct RenderCommandBlitImage*>(ref.rc);
1780 const auto* srcImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1781 const auto* dstImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1782 if ((srcImage == nullptr) || (dstImage == nullptr)) {
1783 return;
1784 }
1785 const auto& srcDesc = srcImage->GetDesc();
1786 const auto& srcPlat = srcImage->GetPlatformData();
1787 const auto& dstDesc = dstImage->GetDesc();
1788 const auto& dstPlat = dstImage->GetPlatformData();
1789 const auto& srcRect = renderCmd.imageBlit.srcOffsets;
1790 const auto& dstRect = renderCmd.imageBlit.dstOffsets;
1791 const auto& src = renderCmd.imageBlit.srcSubresource;
1792 const auto& dst = renderCmd.imageBlit.dstSubresource;
1793 const auto srcMipLevel = static_cast<GLint>(src.mipLevel);
1794 const auto dstMipLevel = static_cast<GLint>(dst.mipLevel);
1795 const auto srcSampleCount = static_cast<uint32_t>(srcDesc.sampleCountFlags);
1796 const auto dstSampleCount = static_cast<uint32_t>(dstDesc.sampleCountFlags);
1797 PLUGIN_ASSERT_MSG(src.layerCount == dst.layerCount, "Source and Destination layercounts do not match!");
1798 PLUGIN_ASSERT_MSG(inRenderpass_ == 0, "RenderCommandBlitImage while inRenderPass");
1799 glDisable(GL_SCISSOR_TEST);
1800 scissorEnabled_ = false;
1801 // NOTE: LAYERS! (texture arrays)
1802 device_.BindReadFrameBuffer(blitImageSourceFbo_);
1803 device_.BindWriteFrameBuffer(blitImageDestinationFbo_);
1804 for (uint32_t layer = 0; layer < src.layerCount; layer++) {
1805 const GLenum srcType = GetTarget(srcPlat.type, layer, srcSampleCount);
1806 const GLenum dstType = GetTarget(dstPlat.type, layer, dstSampleCount);
1807 // glFramebufferTextureLayer for array textures....
1808 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, srcPlat.image, srcMipLevel);
1809 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, dstPlat.image, dstMipLevel);
1810 DoBlit(renderCmd.filter, { src.mipLevel, srcRect[0], srcRect[1], srcDesc.height },
1811 { dst.mipLevel, dstRect[0], dstRect[1], dstDesc.height });
1812 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, srcType, 0, 0);
1813 glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, dstType, 0, 0);
1814 }
1815 }
1816
RenderCommandCopyBuffer(const RenderCommandWithType & ref)1817 void RenderBackendGLES::RenderCommandCopyBuffer(const RenderCommandWithType& ref)
1818 {
1819 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER);
1820 const auto& renderCmd = *static_cast<const struct RenderCommandCopyBuffer*>(ref.rc);
1821 const auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1822 const auto* dstGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.dstHandle);
1823 if (srcGpuBuffer && dstGpuBuffer) {
1824 const auto& srcData = srcGpuBuffer->GetPlatformData();
1825 const auto& dstData = dstGpuBuffer->GetPlatformData();
1826 const auto oldBindR = device_.BoundBuffer(GL_COPY_READ_BUFFER);
1827 const auto oldBindW = device_.BoundBuffer(GL_COPY_WRITE_BUFFER);
1828 device_.BindBuffer(GL_COPY_READ_BUFFER, srcData.buffer);
1829 device_.BindBuffer(GL_COPY_WRITE_BUFFER, dstData.buffer);
1830 glCopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER,
1831 static_cast<GLintptr>(renderCmd.bufferCopy.srcOffset),
1832 static_cast<GLintptr>(renderCmd.bufferCopy.dstOffset), static_cast<GLsizeiptr>(renderCmd.bufferCopy.size));
1833 device_.BindBuffer(GL_COPY_READ_BUFFER, oldBindR);
1834 device_.BindBuffer(GL_COPY_WRITE_BUFFER, oldBindW);
1835 }
1836 }
1837
BufferToImageCopy(const struct RenderCommandCopyBufferImage & renderCmd)1838 void RenderBackendGLES::BufferToImageCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1839 {
1840 #if (RENDER_HAS_GLES_BACKEND == 1) & defined(_WIN32)
1841 // use the workaround only for gles backend on windows. (pvr simulator bug)
1842 constexpr const bool usePixelUnpackBuffer = false;
1843 #else
1844 // expect this to work, and the nvidia bug to be fixed.
1845 constexpr const bool usePixelUnpackBuffer = true;
1846 #endif
1847 auto* srcGpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(renderCmd.srcHandle);
1848 auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1849 if ((srcGpuBuffer == nullptr) || (dstGpuImage == nullptr)) {
1850 return;
1851 }
1852 const auto info = SetupBlit<usePixelUnpackBuffer>(device_, renderCmd.bufferImageCopy, *srcGpuBuffer, *dstGpuImage);
1853 if (info.iPlat.type == GL_TEXTURE_CUBE_MAP) {
1854 BlitCube(device_, info);
1855 } else if (info.iPlat.type == GL_TEXTURE_2D) {
1856 Blit2D(device_, info);
1857 } else if (info.iPlat.type == GL_TEXTURE_2D_ARRAY) {
1858 BlitArray(device_, info);
1859 } else if (info.iPlat.type == GL_TEXTURE_3D) {
1860 Blit3D(device_, info);
1861 #if RENDER_HAS_GLES_BACKEND
1862 } else if (info.iPlat.type == GL_TEXTURE_EXTERNAL_OES) {
1863 PLUGIN_LOG_E("Tried to copy to GL_TEXTURE_EXTERNAL_OES. Ignored!");
1864 #endif
1865 } else {
1866 PLUGIN_ASSERT_MSG(false, "RenderCommandCopyBufferImage unhandled type");
1867 }
1868 FinishBlit<usePixelUnpackBuffer>(device_, *srcGpuBuffer);
1869 }
1870
ImageToBufferCopy(const struct RenderCommandCopyBufferImage & renderCmd)1871 void RenderBackendGLES::ImageToBufferCopy(const struct RenderCommandCopyBufferImage& renderCmd)
1872 {
1873 const auto& bc = renderCmd.bufferImageCopy;
1874 const auto* srcGpuImage = static_cast<GpuImageGLES*>(gpuResourceMgr_.GetImage(renderCmd.srcHandle));
1875 const auto* dstGpuBuffer = static_cast<GpuBufferGLES*>(gpuResourceMgr_.GetBuffer(renderCmd.dstHandle));
1876 PLUGIN_ASSERT(srcGpuImage);
1877 PLUGIN_ASSERT(dstGpuBuffer);
1878 if ((srcGpuImage == nullptr) || (dstGpuBuffer == nullptr)) {
1879 return;
1880 }
1881 const auto& iPlat = static_cast<const GpuImagePlatformDataGL&>(srcGpuImage->GetPlatformData());
1882 const auto& bPlat = static_cast<const GpuBufferPlatformDataGL&>(dstGpuBuffer->GetPlatformData());
1883 if ((iPlat.type != GL_TEXTURE_CUBE_MAP) && (iPlat.type != GL_TEXTURE_2D)) {
1884 PLUGIN_LOG_E("Unsupported texture type in ImageToBufferCopy %x", iPlat.type);
1885 return;
1886 }
1887 device_.BindReadFrameBuffer(blitImageSourceFbo_);
1888 PLUGIN_ASSERT(bc.imageSubresource.layerCount == 1);
1889 GLenum type = GL_TEXTURE_2D;
1890 if (iPlat.type == GL_TEXTURE_CUBE_MAP) {
1891 type = GetCubeMapTarget(iPlat.type, bc.imageSubresource.baseArrayLayer);
1892 }
1893 // glFramebufferTextureLayer for array textures....
1894 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, static_cast<GLuint>(iPlat.image),
1895 static_cast<GLint>(bc.imageSubresource.mipLevel));
1896 const Math::UVec2 sPos { bc.imageOffset.width, bc.imageOffset.height };
1897 const Math::UVec2 sExt { bc.imageExtent.width, bc.imageExtent.height };
1898 device_.BindBuffer(GL_PIXEL_PACK_BUFFER, bPlat.buffer);
1899 glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(bc.bufferRowLength));
1900 glPixelStorei(GL_PACK_ALIGNMENT, 1);
1901 uintptr_t dstOffset = bc.bufferOffset + bPlat.currentByteOffset;
1902 glReadnPixels(static_cast<GLint>(sPos.x), static_cast<GLint>(sPos.y), static_cast<GLsizei>(sExt.x),
1903 static_cast<GLsizei>(sExt.y), iPlat.format, static_cast<GLenum>(iPlat.dataType),
1904 static_cast<GLsizei>(bPlat.alignedByteSize), reinterpret_cast<void*>(dstOffset));
1905 device_.BindBuffer(GL_PIXEL_PACK_BUFFER, 0);
1906 glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, type, 0, 0);
1907 }
1908
RenderCommandCopyBufferImage(const RenderCommandWithType & ref)1909 void RenderBackendGLES::RenderCommandCopyBufferImage(const RenderCommandWithType& ref)
1910 {
1911 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_BUFFER_IMAGE);
1912 const auto& renderCmd = *static_cast<const struct RenderCommandCopyBufferImage*>(ref.rc);
1913 PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1914 if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::BUFFER_TO_IMAGE) {
1915 BufferToImageCopy(renderCmd);
1916 } else if (renderCmd.copyType == RenderCommandCopyBufferImage::CopyType::IMAGE_TO_BUFFER) {
1917 ImageToBufferCopy(renderCmd);
1918 }
1919 }
1920
RenderCommandCopyImage(const RenderCommandWithType & ref)1921 void RenderBackendGLES::RenderCommandCopyImage(const RenderCommandWithType& ref)
1922 {
1923 PLUGIN_ASSERT(ref.type == RenderCommandType::COPY_IMAGE);
1924 const auto& renderCmd = *static_cast<const struct RenderCommandCopyImage*>(ref.rc);
1925 PLUGIN_ASSERT(inRenderpass_ == 0); // this command should never run during renderpass..
1926 const auto* srcGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.srcHandle);
1927 const auto* dstGpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.dstHandle);
1928 if ((srcGpuImage == nullptr) || (dstGpuImage == nullptr)) {
1929 return;
1930 }
1931 const auto& srcDesc = srcGpuImage->GetDesc();
1932 const auto& dstDesc = dstGpuImage->GetDesc();
1933 #if RENDER_VALIDATION_ENABLED
1934 ValidateCopyImage(renderCmd.imageCopy, srcDesc, dstDesc);
1935 #endif
1936 const auto srcMipLevel =
1937 static_cast<GLint>(Math::min(renderCmd.imageCopy.srcSubresource.mipLevel, srcDesc.mipCount - 1));
1938 const auto dstMipLevel =
1939 static_cast<GLint>(Math::min(renderCmd.imageCopy.dstSubresource.mipLevel, dstDesc.mipCount - 1));
1940
1941 auto sOffset = renderCmd.imageCopy.srcOffset;
1942 auto dOffset = renderCmd.imageCopy.dstOffset;
1943 auto size = renderCmd.imageCopy.extent;
1944
1945 // clamp negative offsets to zero and adjust extent and other offset accordingly
1946 ClampOffset(sOffset, dOffset, size);
1947 ClampOffset(dOffset, sOffset, size);
1948
1949 // clamp size to fit src and dst
1950 ClampSize(sOffset, srcDesc, size);
1951 ClampSize(dOffset, dstDesc, size);
1952
1953 const auto& srcPlatData = srcGpuImage->GetPlatformData();
1954 const auto& dstPlatData = dstGpuImage->GetPlatformData();
1955 glCopyImageSubData(srcPlatData.image, srcPlatData.type, srcMipLevel, sOffset.x, sOffset.y, sOffset.z,
1956 dstPlatData.image, dstPlatData.type, dstMipLevel, dOffset.x, dOffset.y, dOffset.z,
1957 static_cast<GLsizei>(size.width), static_cast<GLsizei>(size.height), static_cast<GLsizei>(size.depth));
1958 }
1959
RenderCommandBarrierPoint(const RenderCommandWithType & ref)1960 void RenderBackendGLES::RenderCommandBarrierPoint(const RenderCommandWithType& ref)
1961 {
1962 PLUGIN_ASSERT(ref.type == RenderCommandType::BARRIER_POINT);
1963 const auto& renderCmd = *static_cast<const struct RenderCommandBarrierPoint*>(ref.rc);
1964 const auto& rbList = *managers_.rbList;
1965 // NOTE: proper flagging of barriers.
1966 const RenderBarrierList::BarrierPointBarriers* barrierPointBarriers =
1967 rbList.GetBarrierPointBarriers(renderCmd.barrierPointIndex);
1968 if (!barrierPointBarriers) {
1969 return; // early out
1970 }
1971 const uint32_t barrierListCount = barrierPointBarriers->barrierListCount;
1972 const auto* nextBarrierList = barrierPointBarriers->firstBarrierList;
1973 GLbitfield barriers = 0;
1974 GLbitfield barriersByRegion = 0;
1975 for (uint32_t barrierListIndex = 0; barrierListIndex < barrierListCount; ++barrierListIndex) {
1976 if (nextBarrierList == nullptr) {
1977 // cannot be null, just a safety
1978 PLUGIN_ASSERT(false);
1979 return;
1980 }
1981 const auto& barrierListRef = *nextBarrierList;
1982 nextBarrierList = barrierListRef.nextBarrierPointBarrierList; // advance to next
1983 const uint32_t barrierCount = barrierListRef.count;
1984
1985 for (uint32_t barrierIdx = 0; barrierIdx < barrierCount; ++barrierIdx) {
1986 const auto& barrier = barrierListRef.commandBarriers[barrierIdx];
1987
1988 // check if written by previous shader as an attachment or storage/ image buffer
1989 if (barrier.src.accessFlags & (CORE_ACCESS_SHADER_WRITE_BIT | CORE_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
1990 CORE_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT)) {
1991 const auto resourceHandle = barrier.resourceHandle;
1992 const auto handleType = RenderHandleUtil::GetHandleType(resourceHandle);
1993
1994 // barrier by region is between fragment shaders and supports a subset of barriers.
1995 if ((barrier.src.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT) &&
1996 (barrier.dst.pipelineStageFlags & CORE_PIPELINE_STAGE_FRAGMENT_SHADER_BIT)) {
1997 barriersByRegion |= CommonBarrierBits(barrier.dst.accessFlags, handleType);
1998 } else {
1999 // check the barriers shared with ByRegion
2000 barriers |= CommonBarrierBits(barrier.dst.accessFlags, handleType);
2001
2002 // the rest are invalid for ByRegion
2003 if (barrier.dst.accessFlags & CORE_ACCESS_INDIRECT_COMMAND_READ_BIT) {
2004 barriers |= GL_COMMAND_BARRIER_BIT;
2005 }
2006 if (barrier.dst.accessFlags & CORE_ACCESS_INDEX_READ_BIT) {
2007 barriers |= GL_ELEMENT_ARRAY_BARRIER_BIT;
2008 }
2009 if (barrier.dst.accessFlags & CORE_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) {
2010 barriers |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT;
2011 }
2012 // which are the correct accessFlags?
2013 // GL_PIXEL_BUFFER_BARRIER_BIT:
2014 // - buffer objects via the GL_PIXEL_PACK_BUFFER and GL_PIXEL_UNPACK_BUFFER bindings (via
2015 // glReadPixels, glTexSubImage1D, etc.)
2016 // GL_TEXTURE_UPDATE_BARRIER_BIT:
2017 // - texture via glTex(Sub)Image*, glCopyTex(Sub)Image*, glCompressedTex(Sub)Image*, and
2018 // reads via glGetTexImage GL_BUFFER_UPDATE_BARRIER_BIT:
2019 // - glBufferSubData, glCopyBufferSubData, or glGetBufferSubData, or to buffer object memory
2020 // mapped
2021 // by glMapBuffer or glMapBufferRange
2022 // These two are cover all memory access, CORE_ACCESS_MEMORY_READ_BIT,
2023 // CORE_ACCESS_MEMORY_WRITE_BIT?
2024 if (barrier.dst.accessFlags & (CORE_ACCESS_TRANSFER_READ_BIT | CORE_ACCESS_TRANSFER_WRITE_BIT |
2025 CORE_ACCESS_HOST_READ_BIT | CORE_ACCESS_HOST_WRITE_BIT)) {
2026 if (handleType == RenderHandleType::GPU_IMAGE) {
2027 barriers |= GL_TEXTURE_UPDATE_BARRIER_BIT;
2028 } else if (handleType == RenderHandleType::GPU_BUFFER) {
2029 barriers |= GL_BUFFER_UPDATE_BARRIER_BIT | GL_PIXEL_BUFFER_BARRIER_BIT;
2030 }
2031 }
2032 // GL_TRANSFORM_FEEDBACK_BARRIER_BIT is not used at the moment
2033 }
2034 }
2035 }
2036 }
2037 if (barriers) {
2038 glMemoryBarrier(barriers);
2039 }
2040 if (barriersByRegion) {
2041 // only for fragment-fragment
2042 glMemoryBarrierByRegion(barriersByRegion);
2043 }
2044 }
2045
SetupBind(const DescriptorSetLayoutBinding & binding,vector<Gles::Bind> & resources)2046 Gles::Bind& RenderBackendGLES::SetupBind(const DescriptorSetLayoutBinding& binding, vector<Gles::Bind>& resources)
2047 {
2048 PLUGIN_ASSERT(binding.binding < resources.size());
2049 auto& obj = resources[binding.binding];
2050 PLUGIN_ASSERT(obj.resources.size() == binding.descriptorCount);
2051 PLUGIN_ASSERT(obj.descriptorType == binding.descriptorType);
2052 return obj;
2053 }
2054
BindSampler(const BindableSampler & res,Gles::Bind & obj,uint32_t index)2055 void RenderBackendGLES::BindSampler(const BindableSampler& res, Gles::Bind& obj, uint32_t index)
2056 {
2057 const auto* gpuSampler = gpuResourceMgr_.GetSampler<GpuSamplerGLES>(res.handle);
2058 if (gpuSampler) {
2059 const auto& plat = gpuSampler->GetPlatformData();
2060 obj.resources[index].sampler.samplerId = plat.sampler;
2061 } else {
2062 obj.resources[index].sampler.samplerId = 0;
2063 }
2064 }
2065
BindImage(const BindableImage & res,const GpuResourceState & resState,Gles::Bind & obj,uint32_t index)2066 void RenderBackendGLES::BindImage(
2067 const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
2068 {
2069 const AccessFlags accessFlags = resState.accessFlags;
2070 auto* gpuImage = gpuResourceMgr_.GetImage<GpuImageGLES>(res.handle);
2071 auto& ref = obj.resources[index];
2072 ref.image.image = gpuImage;
2073 const bool read = IS_BIT(accessFlags, CORE_ACCESS_SHADER_READ_BIT);
2074 const bool write = IS_BIT(accessFlags, CORE_ACCESS_SHADER_WRITE_BIT);
2075 if (read && write) {
2076 ref.image.mode = GL_READ_WRITE;
2077 } else if (read) {
2078 ref.image.mode = GL_READ_ONLY;
2079 } else if (write) {
2080 ref.image.mode = GL_WRITE_ONLY;
2081 } else {
2082 // no read and no write?
2083 ref.image.mode = GL_READ_WRITE;
2084 }
2085 ref.image.mipLevel = res.mip;
2086 }
2087
BindImageSampler(const BindableImage & res,const GpuResourceState & resState,Gles::Bind & obj,uint32_t index)2088 void RenderBackendGLES::BindImageSampler(
2089 const BindableImage& res, const GpuResourceState& resState, Gles::Bind& obj, uint32_t index)
2090 {
2091 BindImage(res, resState, obj, index);
2092 BindSampler(BindableSampler { res.samplerHandle }, obj, index);
2093 }
2094
BindBuffer(const BindableBuffer & res,Gles::Bind & obj,uint32_t dynamicOffset,uint32_t index)2095 void RenderBackendGLES::BindBuffer(const BindableBuffer& res, Gles::Bind& obj, uint32_t dynamicOffset, uint32_t index)
2096 {
2097 const auto* gpuBuffer = gpuResourceMgr_.GetBuffer<GpuBufferGLES>(res.handle);
2098 if (gpuBuffer) {
2099 const auto& plat = gpuBuffer->GetPlatformData();
2100 const uint32_t baseOffset = res.byteOffset;
2101 obj.resources[index].buffer.offset = baseOffset + plat.currentByteOffset + dynamicOffset;
2102 obj.resources[index].buffer.size = std::min(plat.bindMemoryByteSize - baseOffset, res.byteSize);
2103 obj.resources[index].buffer.bufferId = plat.buffer;
2104 } else {
2105 obj.resources[index].buffer.offset = 0;
2106 obj.resources[index].buffer.size = 0;
2107 obj.resources[index].buffer.bufferId = 0;
2108 }
2109 }
2110
ProcessBindings(const struct RenderCommandBindDescriptorSets & renderCmd,const DescriptorSetLayoutBindingResourcesHandler & data,uint32_t set)2111 void RenderBackendGLES::ProcessBindings(const struct RenderCommandBindDescriptorSets& renderCmd,
2112 const DescriptorSetLayoutBindingResourcesHandler& data, uint32_t set)
2113 {
2114 BindState& bind = boundObjects_[set];
2115 vector<Gles::Bind>& resources = bind.resources;
2116 #if RENDER_HAS_GLES_BACKEND
2117 bind.oesBinds.clear();
2118 #endif
2119 const auto& dynamicOffsets = renderCmd.descriptorSetDynamicOffsets[set];
2120 const auto& buffers = data.buffers;
2121 const auto& images = data.images;
2122 const auto& samplers = data.samplers;
2123 uint32_t currDynamic = 0U;
2124 for (const auto& res : data.bindings) {
2125 if (res.binding.binding >= resources.size()) {
2126 continue;
2127 }
2128 auto& obj = SetupBind(res.binding, resources);
2129 #if RENDER_HAS_GLES_BACKEND
2130 bool hasOes = false;
2131 #endif
2132 const bool hasArrOffset = (res.binding.descriptorCount > 1);
2133 const uint32_t arrayOffset = hasArrOffset ? GetArrayOffset(data, res) : 0;
2134 for (uint32_t index = 0; index < res.binding.descriptorCount; index++) {
2135 const uint32_t resIdx = (index == 0) ? res.resourceIndex : (arrayOffset + index - 1);
2136 [[maybe_unused]] GpuImageGLES* image = nullptr;
2137 switch (res.binding.descriptorType) {
2138 case CORE_DESCRIPTOR_TYPE_SAMPLER: {
2139 const auto& bRes = samplers[resIdx].desc;
2140 BindSampler(bRes.resource, obj, index);
2141 break;
2142 }
2143 case CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2144 case CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2145 case CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
2146 const auto& bRes = images[resIdx].desc;
2147 BindImage(bRes.resource, bRes.state, obj, index);
2148 image = obj.resources[index].image.image;
2149 break;
2150 }
2151 case CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: {
2152 const auto& bRes = images[resIdx].desc;
2153 BindImageSampler(bRes.resource, bRes.state, obj, index);
2154 image = obj.resources[index].image.image;
2155 break;
2156 }
2157 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2158 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
2159 const auto& bRes = buffers[resIdx].desc;
2160 uint32_t dynamicOffset = 0;
2161 if (currDynamic < dynamicOffsets.dynamicOffsetCount) {
2162 dynamicOffset = dynamicOffsets.dynamicOffsets[currDynamic];
2163 currDynamic++;
2164 }
2165 BindBuffer(bRes.resource, obj, dynamicOffset, index);
2166 break;
2167 }
2168 case CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2169 case CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
2170 const auto& bRes = buffers[resIdx].desc;
2171 BindBuffer(bRes.resource, obj, 0, index);
2172 break;
2173 }
2174 case CORE_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2175 case CORE_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2176 case CORE_DESCRIPTOR_TYPE_MAX_ENUM:
2177 default:
2178 PLUGIN_ASSERT_MSG(false, "Unhandled descriptor type");
2179 break;
2180 }
2181 #if RENDER_HAS_GLES_BACKEND
2182 if ((image) && (image->GetPlatformData().type == GL_TEXTURE_EXTERNAL_OES)) {
2183 hasOes = true;
2184 }
2185 #endif
2186 }
2187 #if RENDER_HAS_GLES_BACKEND
2188 if (hasOes) {
2189 bind.oesBinds.push_back(OES_Bind { (uint8_t)set, (uint8_t)res.binding.binding });
2190 }
2191 #endif
2192 }
2193 }
2194
RenderCommandBindDescriptorSets(const RenderCommandWithType & ref)2195 void RenderBackendGLES::RenderCommandBindDescriptorSets(const RenderCommandWithType& ref)
2196 {
2197 PLUGIN_ASSERT(ref.type == RenderCommandType::BIND_DESCRIPTOR_SETS);
2198 if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2199 return;
2200 }
2201 const auto& renderCmd = *static_cast<const struct RenderCommandBindDescriptorSets*>(ref.rc);
2202 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2203
2204 const auto& aNcdsm = *managers_.descriptorSetMgr;
2205 for (uint32_t idx = renderCmd.firstSet; idx < renderCmd.firstSet + renderCmd.setCount; ++idx) {
2206 PLUGIN_ASSERT_MSG(idx < Gles::ResourceLimits::MAX_SETS, "Invalid descriptorset index");
2207 const auto descriptorSetHandle = renderCmd.descriptorSetHandles[idx];
2208 PLUGIN_ASSERT(RenderHandleUtil::IsValid(descriptorSetHandle));
2209 const auto& data = aNcdsm.GetCpuDescriptorSetData(descriptorSetHandle);
2210 boundObjects_[idx].dirty = true; // mark the set as "changed"
2211 ProcessBindings(renderCmd, data, idx);
2212 // (note, nothing actually gets bound yet.. just the bind cache is updated)
2213 }
2214 }
2215
SetPushConstant(uint32_t program,const Gles::PushConstantReflection & pc,const void * data)2216 void RenderBackendGLES::SetPushConstant(uint32_t program, const Gles::PushConstantReflection& pc, const void* data)
2217 {
2218 const auto location = static_cast<GLint>(pc.location);
2219 // the consts list has been filtered and cleared of unused uniforms.
2220 PLUGIN_ASSERT(location != Gles::INVALID_LOCATION);
2221 GLint count = Math::max(static_cast<GLint>(pc.arraySize), 1);
2222 switch (pc.type) {
2223 case GL_UNSIGNED_INT: {
2224 glProgramUniform1uiv(program, location, count, static_cast<const GLuint*>(data));
2225 break;
2226 }
2227 case GL_FLOAT: {
2228 glProgramUniform1fv(program, location, count, static_cast<const GLfloat*>(data));
2229 break;
2230 }
2231 case GL_FLOAT_VEC2: {
2232 glProgramUniform2fv(program, location, count, static_cast<const GLfloat*>(data));
2233 break;
2234 }
2235 case GL_FLOAT_VEC4: {
2236 glProgramUniform4fv(program, location, count, static_cast<const GLfloat*>(data));
2237 break;
2238 }
2239 case GL_FLOAT_MAT4: {
2240 glProgramUniformMatrix4fv(program, location, count, false, static_cast<const GLfloat*>(data));
2241 break;
2242 }
2243 case GL_UNSIGNED_INT_VEC4: {
2244 glProgramUniform4uiv(program, location, count, static_cast<const GLuint*>(data));
2245 break;
2246 }
2247 default:
2248 PLUGIN_ASSERT_MSG(false, "Unhandled pushconstant variable type");
2249 }
2250 }
2251
SetPushConstants(uint32_t program,const array_view<Gles::PushConstantReflection> & consts)2252 void RenderBackendGLES::SetPushConstants(uint32_t program, const array_view<Gles::PushConstantReflection>& consts)
2253 {
2254 if (boundProgram_.setPushConstants) {
2255 boundProgram_.setPushConstants = false;
2256 const auto& renderCmd = boundProgram_.pushConstants;
2257 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2258 PLUGIN_ASSERT_MSG(renderCmd.pushConstant.byteSize > 0, "PushConstant byteSize is zero!");
2259 PLUGIN_ASSERT_MSG(renderCmd.data, "PushConstant data is nullptr!");
2260 if ((renderCmd.data == nullptr) || (renderCmd.pushConstant.byteSize == 0))
2261 return;
2262 // ASSERT: expecting data is valid
2263 // NOTE: handle rest of the types
2264 for (const auto& pc : consts) {
2265 const size_t offs = pc.offset;
2266 if ((offs + pc.size) > renderCmd.pushConstant.byteSize) {
2267 PLUGIN_LOG_E(
2268 "pushConstant data invalid (data for %s is missing [offset:%zu size:%zu] byteSize of data:%u)",
2269 pc.name.c_str(), pc.offset, pc.size, renderCmd.pushConstant.byteSize);
2270 continue;
2271 }
2272 /*
2273 NOTE: handle the strides....
2274 consts[i].array_stride;
2275 consts[i].matrix_stride; */
2276 SetPushConstant(program, pc, &renderCmd.data[offs]);
2277 }
2278 }
2279 }
2280
RenderCommandPushConstant(const RenderCommandWithType & ref)2281 void RenderBackendGLES::RenderCommandPushConstant(const RenderCommandWithType& ref)
2282 {
2283 PLUGIN_ASSERT(ref.type == RenderCommandType::PUSH_CONSTANT);
2284 if (!boundComputePipeline_ && !boundGraphicsPipeline_) {
2285 return;
2286 }
2287 const auto& renderCmd = *static_cast<const struct RenderCommandPushConstant*>(ref.rc);
2288 if (renderCmd.pushConstant.byteSize > 0) {
2289 PLUGIN_ASSERT(renderCmd.data);
2290 PLUGIN_ASSERT_MSG(renderCmd.psoHandle == currentPsoHandle_, "psoHandle mismatch");
2291 boundProgram_.setPushConstants = true;
2292 boundProgram_.pushConstants = renderCmd;
2293 }
2294 }
2295
RenderCommandClearColorImage(const RenderCommandWithType & ref)2296 void RenderBackendGLES::RenderCommandClearColorImage(const RenderCommandWithType& ref)
2297 {
2298 PLUGIN_ASSERT(ref.type == RenderCommandType::CLEAR_COLOR_IMAGE);
2299 #if RENDER_HAS_GLES_BACKEND
2300 #if (RENDER_VALIDATION_ENABLED == 1)
2301 PLUGIN_LOG_ONCE_E("RenderBackendGLES::RenderCommandClearColorImage",
2302 "Render command clear color image not support with GLES. One should implement higher level path for "
2303 "clearing.");
2304 #endif
2305 #else
2306 const auto& renderCmd = *static_cast<const struct RenderCommandClearColorImage*>(ref.rc);
2307
2308 const GpuImageGLES* imagePtr = gpuResourceMgr_.GetImage<GpuImageGLES>(renderCmd.handle);
2309 if (imagePtr) {
2310 const GpuImagePlatformDataGL& platImage = imagePtr->GetPlatformData();
2311 // NOTE: mip levels and array layers should be handled separately
2312 for (const auto& subresRef : renderCmd.ranges) {
2313 glClearTexImage(platImage.image, // texture
2314 (int32_t)subresRef.baseMipLevel, // level
2315 platImage.format, // format
2316 platImage.dataType, // type
2317 &renderCmd.color); // data
2318 }
2319 }
2320 #endif
2321 }
2322
2323 // dynamic states
RenderCommandDynamicStateViewport(const RenderCommandWithType & ref)2324 void RenderBackendGLES::RenderCommandDynamicStateViewport(const RenderCommandWithType& ref)
2325 {
2326 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_VIEWPORT);
2327 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateViewport*>(ref.rc);
2328 const ViewportDesc& vd = renderCmd.viewportDesc;
2329 SetViewport(renderArea_, vd);
2330 }
2331
RenderCommandDynamicStateScissor(const RenderCommandWithType & ref)2332 void RenderBackendGLES::RenderCommandDynamicStateScissor(const RenderCommandWithType& ref)
2333 {
2334 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_SCISSOR);
2335 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateScissor*>(ref.rc);
2336 const ScissorDesc& sd = renderCmd.scissorDesc;
2337 SetScissor(renderArea_, sd);
2338 }
2339
RenderCommandDynamicStateLineWidth(const RenderCommandWithType & ref)2340 void RenderBackendGLES::RenderCommandDynamicStateLineWidth(const RenderCommandWithType& ref)
2341 {
2342 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_LINE_WIDTH);
2343 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateLineWidth*>(ref.rc);
2344 if (renderCmd.lineWidth != cacheState_.rasterizationState.lineWidth) {
2345 cacheState_.rasterizationState.lineWidth = renderCmd.lineWidth;
2346 glLineWidth(renderCmd.lineWidth);
2347 }
2348 }
2349
RenderCommandDynamicStateDepthBias(const RenderCommandWithType & ref)2350 void RenderBackendGLES::RenderCommandDynamicStateDepthBias(const RenderCommandWithType& ref)
2351 {
2352 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BIAS);
2353 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBias not implemented");
2354 }
2355
RenderCommandDynamicStateBlendConstants(const RenderCommandWithType & ref)2356 void RenderBackendGLES::RenderCommandDynamicStateBlendConstants(const RenderCommandWithType& ref)
2357 {
2358 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_BLEND_CONSTANTS);
2359 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateBlendConstants not implemented");
2360 }
2361
RenderCommandDynamicStateDepthBounds(const RenderCommandWithType & ref)2362 void RenderBackendGLES::RenderCommandDynamicStateDepthBounds(const RenderCommandWithType& ref)
2363 {
2364 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_DEPTH_BOUNDS);
2365 PLUGIN_ASSERT_MSG(false, "RenderCommandDynamicStateDepthBounds not implemented");
2366 }
2367
SetStencilState(const uint32_t frontFlags,const GraphicsState::StencilOpState & front,const uint32_t backFlags,const GraphicsState::StencilOpState & back)2368 void RenderBackendGLES::SetStencilState(const uint32_t frontFlags, const GraphicsState::StencilOpState& front,
2369 const uint32_t backFlags, const GraphicsState::StencilOpState& back)
2370 {
2371 auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2372 auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2373 const uint32_t FUNCMASK =
2374 (StencilSetFlags::SETCOMPAREOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETREFERENCE);
2375 if (frontFlags & StencilSetFlags::SETWRITEMASK) {
2376 cFront.writeMask = front.writeMask;
2377 glStencilMaskSeparate(GL_FRONT, cFront.writeMask);
2378 }
2379 if (frontFlags & FUNCMASK) {
2380 SetStencilCompareOp(cFront, front);
2381 glStencilFuncSeparate(
2382 GL_FRONT, GetCompareOp(cFront.compareOp), static_cast<GLint>(cFront.reference), cFront.compareMask);
2383 }
2384 if (frontFlags & StencilSetFlags::SETOP) {
2385 SetStencilOp(cFront, front);
2386 glStencilOpSeparate(
2387 GL_FRONT, GetStencilOp(cFront.failOp), GetStencilOp(cFront.depthFailOp), GetStencilOp(cFront.passOp));
2388 }
2389 if (backFlags & StencilSetFlags::SETWRITEMASK) {
2390 cBack.writeMask = back.writeMask;
2391 glStencilMaskSeparate(GL_BACK, cBack.writeMask);
2392 }
2393 if (backFlags & FUNCMASK) {
2394 SetStencilCompareOp(cBack, back);
2395 glStencilFuncSeparate(
2396 GL_BACK, GetCompareOp(cBack.compareOp), static_cast<GLint>(cBack.reference), cBack.compareMask);
2397 }
2398 if (backFlags & StencilSetFlags::SETOP) {
2399 SetStencilOp(cBack, back);
2400 glStencilOpSeparate(
2401 GL_FRONT, GetStencilOp(cBack.failOp), GetStencilOp(cBack.depthFailOp), GetStencilOp(cBack.passOp));
2402 }
2403 }
2404
RenderCommandDynamicStateStencil(const RenderCommandWithType & ref)2405 void RenderBackendGLES::RenderCommandDynamicStateStencil(const RenderCommandWithType& ref)
2406 {
2407 PLUGIN_ASSERT(ref.type == RenderCommandType::DYNAMIC_STATE_STENCIL);
2408 const auto& renderCmd = *static_cast<const struct RenderCommandDynamicStateStencil*>(ref.rc);
2409 auto& cFront = cacheState_.depthStencilState.frontStencilOpState;
2410 auto& cBack = cacheState_.depthStencilState.backStencilOpState;
2411 uint32_t setFront = 0;
2412 uint32_t setBack = 0;
2413 if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_FRONT_BIT) {
2414 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2415 if (renderCmd.mask != cFront.compareMask) {
2416 cFront.compareMask = renderCmd.mask;
2417 setFront |= StencilSetFlags::SETCOMPAREMASK;
2418 }
2419 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2420 if (renderCmd.mask != cFront.writeMask) {
2421 cFront.writeMask = renderCmd.mask;
2422 setFront |= StencilSetFlags::SETWRITEMASK;
2423 }
2424 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2425 if (renderCmd.mask != cFront.reference) {
2426 cFront.reference = renderCmd.mask;
2427 setFront |= StencilSetFlags::SETREFERENCE;
2428 }
2429 }
2430 }
2431 if (renderCmd.faceMask & StencilFaceFlagBits::CORE_STENCIL_FACE_BACK_BIT) {
2432 if (renderCmd.dynamicState == StencilDynamicState::COMPARE_MASK) {
2433 if (renderCmd.mask != cBack.compareMask) {
2434 cBack.compareMask = renderCmd.mask;
2435 setBack |= StencilSetFlags::SETCOMPAREMASK;
2436 }
2437 } else if (renderCmd.dynamicState == StencilDynamicState::WRITE_MASK) {
2438 if (renderCmd.mask != cBack.writeMask) {
2439 cBack.writeMask = renderCmd.mask;
2440 setBack |= StencilSetFlags::SETWRITEMASK;
2441 }
2442 } else if (renderCmd.dynamicState == StencilDynamicState::REFERENCE) {
2443 if (renderCmd.mask != cBack.reference) {
2444 cBack.reference = renderCmd.mask;
2445 setBack |= StencilSetFlags::SETREFERENCE;
2446 }
2447 }
2448 }
2449 SetStencilState(setFront, cFront, setBack, cBack);
2450 }
2451
RenderCommandFragmentShadingRate(const RenderCommandWithType & renderCmd)2452 void RenderBackendGLES::RenderCommandFragmentShadingRate(const RenderCommandWithType& renderCmd)
2453 {
2454 #if (RENDER_VALIDATION_ENABLED == 1)
2455 PLUGIN_LOG_ONCE_I("gles_RenderCommandFragmentShadingRate",
2456 "RENDER_VALIDATION: Fragment shading rate not available with GL(ES) backend.");
2457 #endif
2458 }
2459
RenderCommandExecuteBackendFramePosition(const RenderCommandWithType & renderCmd)2460 void RenderBackendGLES::RenderCommandExecuteBackendFramePosition(const RenderCommandWithType& renderCmd)
2461 {
2462 PLUGIN_ASSERT_MSG(false, "RenderCommandExecuteBackendFramePosition not implemented");
2463 }
2464
RenderCommandWriteTimestamp(const RenderCommandWithType & renderCmd)2465 void RenderBackendGLES::RenderCommandWriteTimestamp(const RenderCommandWithType& renderCmd)
2466 {
2467 PLUGIN_ASSERT_MSG(false, "RenderCommandWriteTimestamp not implemented");
2468 }
2469
BindVertexInputs(const VertexInputDeclarationData & decldata,const array_view<const int32_t> & vertexInputs)2470 void RenderBackendGLES::BindVertexInputs(
2471 const VertexInputDeclarationData& decldata, const array_view<const int32_t>& vertexInputs)
2472 {
2473 // update bindings for the VAO.
2474 // process with attribute descriptions to only bind the needed vertex buffers
2475 // NOTE: that there are or might be extran bindings in the decldata.bindingDescriptions,
2476 // but we only bind the ones needed for the shader
2477 const uint32_t minBinding = Math::min(vertexAttribBinds_, decldata.attributeDescriptionCount);
2478 for (uint32_t i = 0; i < minBinding; ++i) {
2479 const auto& attributeRef = decldata.attributeDescriptions[i];
2480 const uint32_t location = attributeRef.location;
2481 const uint32_t binding = attributeRef.binding;
2482 // NOTE: we need to bind all the buffers to the correct bindings.
2483 // shader optimized check (vertexInputs, some locations are not in use)
2484 if ((location != ~0u) && (binding != ~0u) && (vertexInputs[location] != Gles::INVALID_LOCATION)) {
2485 const auto& slot = vertexAttribBindSlots_[binding];
2486 const auto& bindingRef = decldata.bindingDescriptions[binding];
2487 PLUGIN_ASSERT(bindingRef.binding == binding);
2488 // buffer bound to slot, and it's used by the shader.
2489 device_.BindVertexBuffer(binding, slot.id, slot.offset, static_cast<intptr_t>(bindingRef.stride));
2490 /*
2491 core/vulkan
2492 bindingRef.vertexInputRate = CORE_VERTEX_INPUT_RATE_VERTEX (0) attribute index advances per vertex
2493 bindingRef.vertexInputRate = CORE_VERTEX_INPUT_RATE_INSTANCE (1) attribute index advances per instance
2494
2495 gl/gles
2496 If divisor is 0, the attributes using the buffer bound to bindingindex advance once per vertex.
2497 If divisor is >0, the attributes advance once per divisor instances of the set(s) of vertices being
2498 rendered.
2499
2500 so we can directly pass the inputRate as VertexBindingDivisor. (ie. advance once per instance)
2501 ie. enum happens to match and can simply cast.
2502 */
2503 static_assert(CORE_VERTEX_INPUT_RATE_VERTEX == 0 && CORE_VERTEX_INPUT_RATE_INSTANCE == 1);
2504 device_.VertexBindingDivisor(binding, static_cast<uint32_t>(bindingRef.vertexInputRate));
2505 }
2506 }
2507 }
2508
BindPipeline()2509 const BASE_NS::array_view<Binder>* RenderBackendGLES::BindPipeline()
2510 {
2511 const array_view<Binder>* resourceList = nullptr;
2512 const array_view<Gles::PushConstantReflection>* pushConstants = nullptr;
2513 int32_t flipLocation = Gles::INVALID_LOCATION;
2514 uint32_t program = 0;
2515 // Push constants and "fliplocation" uniform (ie. uniform state) should be only updated if changed...
2516 if (currentFrameBuffer_) { // mCurrentFrameBuffer is only set if graphics pipeline is bound..
2517 PLUGIN_ASSERT(boundComputePipeline_ == nullptr);
2518 PLUGIN_ASSERT(boundGraphicsPipeline_);
2519 if (!boundGraphicsPipeline_) {
2520 return resourceList;
2521 }
2522 const auto& pipelineData =
2523 static_cast<const PipelineStateObjectPlatformDataGL&>(boundGraphicsPipeline_->GetPlatformData());
2524 const GpuShaderProgramGLES* shader = pipelineData.graphicsShader;
2525 #if RENDER_HAS_GLES_BACKEND
2526 if (!oesBinds_.empty()) {
2527 // okay, oes vector contains the set/bind to which an OES texture is bounds
2528 // ask for a compatible program from the boundGraphicsPipeline_
2529 shader = boundGraphicsPipeline_->GetOESProgram(oesBinds_);
2530 }
2531 #endif
2532 if (!shader) {
2533 return resourceList;
2534 }
2535 const auto& sd = static_cast<const GpuShaderProgramPlatformDataGL&>(shader->GetPlatformData());
2536 program = sd.program;
2537
2538 FlushViewportScissors();
2539 if (!scissorEnabled_) {
2540 scissorEnabled_ = true;
2541 glEnable(GL_SCISSOR_TEST); // Always enabled
2542 }
2543 #if (RENDER_PERF_ENABLED == 1)
2544 if (device_.BoundProgram() != program) {
2545 ++perfCounters_.bindProgram;
2546 }
2547 #endif
2548 device_.UseProgram(program);
2549 device_.BindVertexArray(pipelineData.vao);
2550 BindVertexInputs(pipelineData.vertexInputDeclaration, array_view<const int32_t>(sd.inputs, countof(sd.inputs)));
2551 device_.BindElementBuffer(boundIndexBuffer_.id);
2552 resourceList = &sd.resourceList;
2553 flipLocation = sd.flipLocation;
2554 pushConstants = &sd.pushConstants;
2555 } else {
2556 PLUGIN_ASSERT(boundGraphicsPipeline_ == nullptr);
2557 PLUGIN_ASSERT(boundComputePipeline_);
2558 if (!boundComputePipeline_) {
2559 return resourceList;
2560 }
2561 const auto& pipelineData =
2562 static_cast<const PipelineStateObjectPlatformDataGL&>(boundComputePipeline_->GetPlatformData());
2563 if (pipelineData.computeShader) {
2564 const auto& sd =
2565 static_cast<const GpuComputeProgramPlatformDataGL&>(pipelineData.computeShader->GetPlatformData());
2566 program = sd.program;
2567 #if (RENDER_PERF_ENABLED == 1)
2568 if (device_.BoundProgram() != program) {
2569 ++perfCounters_.bindProgram;
2570 }
2571 #endif
2572 device_.UseProgram(program);
2573 resourceList = &sd.resourceList;
2574 flipLocation = sd.flipLocation;
2575 pushConstants = &sd.pushConstants;
2576 }
2577 }
2578
2579 if (pushConstants) {
2580 SetPushConstants(program, *pushConstants);
2581 }
2582 if (flipLocation != Gles::INVALID_LOCATION) {
2583 const float flip = (renderingToDefaultFbo_) ? (-1.f) : (1.f);
2584 glProgramUniform1fv(program, flipLocation, 1, &flip);
2585 }
2586 return resourceList;
2587 }
2588
BindResources()2589 void RenderBackendGLES::BindResources()
2590 {
2591 #if RENDER_HAS_GLES_BACKEND
2592 // scan all sets here to see if any of the sets has oes.
2593 // we don't actually need to rebuild this info every time.
2594 // should "emulate" the gpu descriptor sets better. (and store this information along with the other bind cache
2595 // data there)
2596 oesBinds_.clear();
2597 for (const auto& state : boundObjects_) {
2598 const auto& oes = state.oesBinds;
2599 if (!oes.empty()) {
2600 oesBinds_.append(oes.begin(), oes.end());
2601 }
2602 }
2603 #endif
2604 const auto* resourceList = BindPipeline();
2605 if (!resourceList) {
2606 return;
2607 }
2608 for (const auto& r : *resourceList) {
2609 PLUGIN_ASSERT(r.set < PipelineLayoutConstants::MAX_DESCRIPTOR_SET_COUNT);
2610 if (r.bind >= static_cast<uint32_t>(boundObjects_[r.set].resources.size())) {
2611 continue;
2612 }
2613 const auto& res = boundObjects_[r.set].resources[r.bind];
2614 PLUGIN_ASSERT(res.resources.size() == r.id.size());
2615 auto resType = res.descriptorType;
2616 if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) {
2617 resType = CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
2618 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2619 resType = CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER;
2620 }
2621
2622 // a few helpers for updating perf counters and binding the sampler/texture/buffer
2623 auto bindSampler = [this](uint32_t textureUnit, uint32_t samplerId) {
2624 #if (RENDER_PERF_ENABLED == 1)
2625 if (device_.BoundSampler(textureUnit) != samplerId) {
2626 ++perfCounters_.bindSampler;
2627 }
2628 #endif
2629 device_.BindSampler(textureUnit, samplerId);
2630 };
2631 auto bindTexture = [this](uint32_t textureUnit, const GpuImagePlatformDataGL& dplat) {
2632 #if (RENDER_PERF_ENABLED == 1)
2633 if (device_.BoundTexture(textureUnit, dplat.type) != dplat.image) {
2634 ++perfCounters_.bindTexture;
2635 }
2636 #endif
2637 device_.BindTexture(textureUnit, dplat.type, dplat.image);
2638 };
2639 auto bindTextureImage = [this](uint32_t textureUnit, const Gles::Bind::ImageType& image,
2640 const GpuImagePlatformDataGL& dplat) {
2641 uint32_t level = (image.mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? image.mipLevel : 0U;
2642 device_.BindImageTexture(textureUnit, dplat.image, level, false, 0, image.mode, dplat.internalFormat);
2643 };
2644 auto bindBuffer = [this](uint32_t target, uint32_t binding, const Gles::Bind::BufferType& buffer) {
2645 #if (RENDER_PERF_ENABLED == 1)
2646 if (device_.BoundBuffer(target) != buffer.bufferId) {
2647 ++perfCounters_.bindBuffer;
2648 }
2649 #endif
2650 device_.BindBufferRange(target, binding, buffer.bufferId, buffer.offset, buffer.size);
2651 };
2652 auto setMipLevel = [](const uint32_t type, const uint32_t mipLevel) {
2653 // either force the defined mip level or use defaults.
2654 glTexParameteri(type, GL_TEXTURE_BASE_LEVEL,
2655 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ? mipLevel : 0U));
2656 glTexParameteri(type, GL_TEXTURE_MAX_LEVEL,
2657 static_cast<GLint>((mipLevel != PipelineStateConstants::GPU_IMAGE_ALL_MIP_LEVELS) ?
2658 mipLevel : 1000U)); // 1000 : param
2659 };
2660
2661 #if (RENDER_VALIDATION_ENABLED == 1)
2662 if (resType != r.type) {
2663 PLUGIN_LOG_ONCE_E(
2664 "backend_desc_type_mismatch_gles", "RENDER_VALIDATION: shader / pipeline descriptor type mismatch");
2665 }
2666 #endif
2667
2668 for (uint32_t index = 0; index < res.resources.size(); index++) {
2669 const auto& obj = res.resources[index];
2670 for (const auto& id : r.id[index]) {
2671 const auto binding = index + id;
2672 if (resType == CORE_DESCRIPTOR_TYPE_SAMPLER) {
2673 bindSampler(binding, obj.sampler.samplerId);
2674 } else if ((resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) ||
2675 (resType == CORE_DESCRIPTOR_TYPE_SAMPLED_IMAGE) ||
2676 (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT)) {
2677 if (resType == CORE_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {
2678 bindSampler(binding, obj.sampler.samplerId);
2679 } else if (resType == CORE_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {
2680 bindSampler(binding, 0U);
2681 }
2682 if (obj.image.image) {
2683 auto& dplat = obj.image.image->GetPlatformData();
2684 bindTexture(binding, dplat);
2685
2686 // NOTE: the last setting is active, can not have different miplevels bound from single
2687 // resource.
2688 // Check and update (if needed) the forced miplevel.
2689 if (dplat.mipLevel != obj.image.mipLevel) {
2690 // NOTE: we are actually modifying the texture object bound above
2691 const_cast<GpuImagePlatformDataGL&>(dplat).mipLevel = obj.image.mipLevel;
2692 setMipLevel(dplat.type, dplat.mipLevel);
2693 }
2694 }
2695 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_IMAGE) {
2696 if (obj.image.image) {
2697 auto& dplat = obj.image.image->GetPlatformData();
2698 bindTextureImage(binding, obj.image, dplat);
2699 }
2700 } else if (resType == CORE_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
2701 bindBuffer(GL_UNIFORM_BUFFER, binding, obj.buffer);
2702 } else if (resType == CORE_DESCRIPTOR_TYPE_STORAGE_BUFFER) {
2703 bindBuffer(GL_SHADER_STORAGE_BUFFER, binding, obj.buffer);
2704 }
2705 }
2706 }
2707 }
2708 // mark all bound.
2709 for (auto& b : boundObjects_) {
2710 b.dirty = false;
2711 }
2712 }
2713
RenderCommandBeginDebugMarker(const RenderCommandWithType & ref)2714 void RenderBackendGLES::RenderCommandBeginDebugMarker(const RenderCommandWithType& ref)
2715 {
2716 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2717 const auto& renderCmd = *static_cast<const struct RenderCommandBeginDebugMarker*>(ref.rc);
2718 glPushDebugGroup(GL_DEBUG_SOURCE_APPLICATION, 0, -1, (const GLchar*)renderCmd.name.data());
2719 #endif
2720 }
2721
RenderCommandEndDebugMarker(const RenderCommandWithType &)2722 void RenderBackendGLES::RenderCommandEndDebugMarker(const RenderCommandWithType&)
2723 {
2724 #if (RENDER_DEBUG_MARKERS_ENABLED == 1)
2725 glPopDebugGroup();
2726 #endif
2727 }
2728
2729 #if (RENDER_PERF_ENABLED == 1)
StartFrameTimers(const RenderCommandFrameData & renderCommandFrameData)2730 void RenderBackendGLES::StartFrameTimers(const RenderCommandFrameData& renderCommandFrameData)
2731 {
2732 framePerfCounters_ = {};
2733 for (const auto& renderCommandContext : renderCommandFrameData.renderCommandContexts) {
2734 const string_view& debugName = renderCommandContext.debugName;
2735 if (timers_.count(debugName) == 0) { // new timers
2736 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2737 PerfDataSet& perfDataSet = timers_[debugName];
2738 constexpr GpuQueryDesc desc { QueryType::CORE_QUERY_TYPE_TIMESTAMP, 0 };
2739 perfDataSet.gpuHandle = gpuQueryMgr_->Create(debugName, CreateGpuQueryGLES(device_, desc));
2740 perfDataSet.counter = 0u;
2741 #else
2742 timers_.insert({ debugName, {} });
2743 #endif
2744 }
2745 }
2746 }
2747
EndFrameTimers()2748 void RenderBackendGLES::EndFrameTimers()
2749 {
2750 int64_t fullGpuTime = 0;
2751 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2752 // already in micros
2753 fullGpuTime = fullGpuCounter_;
2754 fullGpuCounter_ = 0;
2755 #endif
2756 if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2757 CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2758 globalPerfData) {
2759 CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RENDER");
2760 perfData->UpdateData("RenderBackend", "Full_Cpu", commonCpuTimers_.full.GetMicroseconds());
2761 perfData->UpdateData("RenderBackend", "Acquire_Cpu", commonCpuTimers_.acquire.GetMicroseconds());
2762 perfData->UpdateData("RenderBackend", "Execute_Cpu", commonCpuTimers_.execute.GetMicroseconds());
2763 perfData->UpdateData("RenderBackend", "Submit_Cpu", commonCpuTimers_.submit.GetMicroseconds());
2764 perfData->UpdateData("RenderBackend", "Present_Cpu", commonCpuTimers_.present.GetMicroseconds());
2765 perfData->UpdateData("RenderBackend", "Full_Gpu", fullGpuTime);
2766
2767 CORE_PROFILER_PLOT("Full_Cpu", static_cast<int64_t>(commonCpuTimers_.full.GetMicroseconds()));
2768 CORE_PROFILER_PLOT("Acquire_Cpu", static_cast<int64_t>(commonCpuTimers_.acquire.GetMicroseconds()));
2769 CORE_PROFILER_PLOT("Execute_Cpu", static_cast<int64_t>(commonCpuTimers_.execute.GetMicroseconds()));
2770 CORE_PROFILER_PLOT("Submit_Cpu", static_cast<int64_t>(commonCpuTimers_.submit.GetMicroseconds()));
2771 CORE_PROFILER_PLOT("Present_Cpu", static_cast<int64_t>(commonCpuTimers_.present.GetMicroseconds()));
2772 CORE_PROFILER_PLOT("Full_Gpu", static_cast<int64_t>(fullGpuTime));
2773 }
2774
2775 CORE_PROFILER_PLOT("Instance count", static_cast<int64_t>(framePerfCounters_.instanceCount));
2776 CORE_PROFILER_PLOT("Triangle count", static_cast<int64_t>(framePerfCounters_.triangleCount));
2777 CORE_PROFILER_PLOT("Draw count", static_cast<int64_t>(framePerfCounters_.drawCount));
2778 CORE_PROFILER_PLOT("Draw Indirect count", static_cast<int64_t>(framePerfCounters_.drawIndirectCount));
2779 CORE_PROFILER_PLOT("Dispatch count", static_cast<int64_t>(framePerfCounters_.dispatchCount));
2780 CORE_PROFILER_PLOT("Dispatch Indirect count", static_cast<int64_t>(framePerfCounters_.dispatchIndirectCount));
2781 CORE_PROFILER_PLOT("RenderPass count", static_cast<int64_t>(framePerfCounters_.renderPassCount));
2782 CORE_PROFILER_PLOT("Bind program count", static_cast<int64_t>(framePerfCounters_.bindProgram));
2783 CORE_PROFILER_PLOT("Bind sampler count", static_cast<int64_t>(framePerfCounters_.bindSampler));
2784 CORE_PROFILER_PLOT("Bind texture count", static_cast<int64_t>(framePerfCounters_.bindTexture));
2785 CORE_PROFILER_PLOT("Bind buffer count", static_cast<int64_t>(framePerfCounters_.bindBuffer));
2786 }
2787
CopyPerfTimeStamp(const string_view name,PerfDataSet & perfDataSet)2788 void RenderBackendGLES::CopyPerfTimeStamp(const string_view name, PerfDataSet& perfDataSet)
2789 {
2790 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2791 int64_t gpuMicroSeconds = 0;
2792 if (validGpuQueries_) {
2793 GpuQuery* gpuQuery = gpuQueryMgr_->Get(perfDataSet.gpuHandle);
2794 PLUGIN_ASSERT(gpuQuery);
2795
2796 gpuQuery->NextQueryIndex();
2797
2798 const auto& platData = static_cast<const GpuQueryPlatformDataGLES&>(gpuQuery->GetPlatformData());
2799 PLUGIN_ASSERT(platData.queryObject);
2800
2801 GLint disjointOccurred = 0;
2802 #ifdef GL_GPU_DISJOINT_EXT
2803 // Clear disjoint error
2804 glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjointOccurred);
2805 #endif
2806 if (!disjointOccurred && (++perfDataSet.counter) > device_.GetCommandBufferingCount()) {
2807 GLuint64 gpuNanoSeconds = 0U;
2808 #ifdef GL_GPU_DISJOINT_EXT
2809 glGetQueryObjectui64vEXT(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2810 #else
2811 glGetQueryObjectui64v(platData.queryObject, GL_QUERY_RESULT, &gpuNanoSeconds);
2812 #endif
2813 static uint64_t NANOSECONDS_TO_MICROSECONDS = 1000; // 1000 : size
2814 gpuMicroSeconds = static_cast<int64_t>(gpuNanoSeconds / NANOSECONDS_TO_MICROSECONDS);
2815 if (gpuMicroSeconds > UINT32_MAX) {
2816 gpuMicroSeconds = 0;
2817 }
2818 fullGpuCounter_ += gpuMicroSeconds;
2819 } else if (disjointOccurred) {
2820 PLUGIN_LOG_V("GL_GPU_DISJOINT_EXT disjoint occurred.");
2821 }
2822 }
2823 #endif
2824 const int64_t cpuMicroSeconds = perfDataSet.cpuTimer.GetMicroseconds();
2825
2826 if (CORE_NS::IPerformanceDataManagerFactory* globalPerfData =
2827 CORE_NS::GetInstance<CORE_NS::IPerformanceDataManagerFactory>(CORE_NS::UID_PERFORMANCE_FACTORY);
2828 globalPerfData) {
2829 CORE_NS::IPerformanceDataManager* perfData = globalPerfData->Get("RenderNode");
2830
2831 perfData->UpdateData(name, "Backend_Cpu", cpuMicroSeconds);
2832 #if (RENDER_GPU_TIMESTAMP_QUERIES_ENABLED == 1)
2833 perfData->UpdateData(name, "Backend_Gpu", gpuMicroSeconds);
2834 #endif
2835 perfData->UpdateData(name, "Backend_Count_Triangle", perfCounters_.triangleCount);
2836 perfData->UpdateData(name, "Backend_Count_InstanceCount", perfCounters_.instanceCount);
2837 perfData->UpdateData(name, "Backend_Count_Draw", perfCounters_.drawCount);
2838 perfData->UpdateData(name, "Backend_Count_DrawIndirect", perfCounters_.drawIndirectCount);
2839 perfData->UpdateData(name, "Backend_Count_Dispatch", perfCounters_.dispatchCount);
2840 perfData->UpdateData(name, "Backend_Count_DispatchIndirect", perfCounters_.dispatchIndirectCount);
2841 perfData->UpdateData(name, "Backend_Count_RenderPass", perfCounters_.renderPassCount);
2842 perfData->UpdateData(name, "Backend_Count_BindProgram", perfCounters_.bindProgram);
2843 perfData->UpdateData(name, "Backend_Count_BindSample", perfCounters_.bindSampler);
2844 perfData->UpdateData(name, "Backend_Count_BindTexture", perfCounters_.bindTexture);
2845 perfData->UpdateData(name, "Backend_Count_BindBuffer", perfCounters_.bindBuffer);
2846 framePerfCounters_.drawCount += perfCounters_.drawCount;
2847 framePerfCounters_.drawIndirectCount += perfCounters_.drawIndirectCount;
2848 framePerfCounters_.dispatchCount += perfCounters_.dispatchCount;
2849 framePerfCounters_.dispatchIndirectCount += perfCounters_.dispatchIndirectCount;
2850 framePerfCounters_.renderPassCount += perfCounters_.renderPassCount;
2851 framePerfCounters_.bindProgram += perfCounters_.bindProgram;
2852 framePerfCounters_.bindSampler += perfCounters_.bindSampler;
2853 framePerfCounters_.bindTexture += perfCounters_.bindTexture;
2854 framePerfCounters_.bindBuffer += perfCounters_.bindBuffer;
2855 framePerfCounters_.triangleCount += perfCounters_.triangleCount;
2856 framePerfCounters_.instanceCount += perfCounters_.instanceCount;
2857 }
2858 }
2859 #endif
2860
PrimeDepthStencilState(const GraphicsState & graphicsState)2861 void RenderBackendGLES::PrimeDepthStencilState(const GraphicsState& graphicsState)
2862 {
2863 auto& cDepth = cacheState_.depthStencilState;
2864 cDepth = graphicsState.depthStencilState;
2865 // CORE_DYNAMIC_STATE_DEPTH_BOUNDS NOT SUPPORTED ON GLES. (and not implemented on GL either)
2866 SetState(GL_DEPTH_TEST, cDepth.enableDepthTest);
2867 SetState(GL_STENCIL_TEST, cDepth.enableStencilTest);
2868 glDepthFunc(GetCompareOp(cDepth.depthCompareOp));
2869 glDepthMask((cDepth.enableDepthWrite ? static_cast<GLboolean>(GL_TRUE) : static_cast<GLboolean>(GL_FALSE)));
2870 const uint32_t updateAllFlags =
2871 (StencilSetFlags::SETOP | StencilSetFlags::SETCOMPAREMASK | StencilSetFlags::SETCOMPAREOP |
2872 StencilSetFlags::SETREFERENCE | StencilSetFlags::SETWRITEMASK);
2873 SetStencilState(updateAllFlags, cDepth.frontStencilOpState, updateAllFlags, cDepth.backStencilOpState);
2874 }
2875
PrimeBlendState(const GraphicsState & graphicsState)2876 void RenderBackendGLES::PrimeBlendState(const GraphicsState& graphicsState)
2877 {
2878 auto& cBlend = cacheState_.colorBlendState;
2879 cBlend = graphicsState.colorBlendState;
2880 glBlendColor(cBlend.colorBlendConstants[Gles::RED_INDEX], cBlend.colorBlendConstants[Gles::GREEN_INDEX],
2881 cBlend.colorBlendConstants[Gles::BLUE_INDEX], cBlend.colorBlendConstants[Gles::ALPHA_INDEX]);
2882 GLuint maxColorAttachments;
2883 glGetIntegerv(GL_MAX_COLOR_ATTACHMENTS, (GLint*)&maxColorAttachments);
2884 maxColorAttachments = BASE_NS::Math::min(PipelineStateConstants::MAX_COLOR_ATTACHMENT_COUNT, maxColorAttachments);
2885 for (GLuint i = 0; i < maxColorAttachments; i++) {
2886 const auto& cBlendState = cBlend.colorAttachments[i];
2887 glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
2888 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
2889 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
2890 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
2891 if (cBlendState.enableBlend) {
2892 glEnablei(GL_BLEND, i);
2893 } else {
2894 glDisablei(GL_BLEND, i);
2895 }
2896 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
2897 GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
2898 GetBlendFactor(cBlendState.dstAlphaBlendFactor));
2899 glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
2900 }
2901 // logicops are unsupported on GLES
2902 }
2903
PrimeCache(const GraphicsState & graphicsState)2904 void RenderBackendGLES::PrimeCache(const GraphicsState& graphicsState) // Forces the graphics state..
2905 {
2906 if (cachePrimed_) {
2907 return;
2908 }
2909 cachePrimed_ = true;
2910 /// GRAPHICSSTATE inputAssembly
2911 const auto& ia = graphicsState.inputAssembly;
2912 auto& cia = cacheState_.inputAssembly;
2913 cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
2914 SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
2915 topology_ = ia.primitiveTopology;
2916 /// GRAPHICSSTATE rasterizationState
2917 const auto& rs = graphicsState.rasterizationState;
2918 auto& crs = cacheState_.rasterizationState;
2919 // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
2920 polygonMode_ = rs.polygonMode;
2921 // GL_DEPTH_CLAMP,rs.enableDepthClamp NOT SUPPORTED CHECK GLES 3.2
2922 crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
2923 SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
2924 crs.enableDepthBias = rs.enableDepthBias;
2925 SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
2926 crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
2927 crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
2928 glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
2929 // depthBiasClamp NOT SUPPORTED! CHECK GLES 3.2
2930 // If cull mode Flags change...
2931 crs.cullModeFlags = rs.cullModeFlags;
2932 SetCullMode(crs);
2933 crs.frontFace = rs.frontFace;
2934 SetFrontFace(crs);
2935 crs.lineWidth = rs.lineWidth;
2936 glLineWidth(rs.lineWidth);
2937 PrimeDepthStencilState(graphicsState);
2938 PrimeBlendState(graphicsState);
2939 }
2940
UpdateDepthState(const GraphicsState & graphicsState)2941 void RenderBackendGLES::UpdateDepthState(const GraphicsState& graphicsState)
2942 {
2943 const auto& depth = graphicsState.depthStencilState;
2944 auto& cDepth = cacheState_.depthStencilState;
2945 if (depth.enableDepthTest != cDepth.enableDepthTest) {
2946 cDepth.enableDepthTest = depth.enableDepthTest;
2947 SetState(GL_DEPTH_TEST, depth.enableDepthTest);
2948 }
2949 if (depth.depthCompareOp != cDepth.depthCompareOp) {
2950 cDepth.depthCompareOp = depth.depthCompareOp;
2951 glDepthFunc(GetCompareOp(depth.depthCompareOp));
2952 }
2953 if (depth.enableDepthWrite != cDepth.enableDepthWrite) {
2954 cDepth.enableDepthWrite = depth.enableDepthWrite;
2955 glDepthMask((depth.enableDepthWrite == GL_TRUE));
2956 }
2957 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BOUNDS)) {
2958 // CORE_DYNAMIC_STATE_DEPTH_BOUNDS not supported on GLES.
2959 }
2960 }
2961
UpdateStencilState(const GraphicsState & graphicsState)2962 void RenderBackendGLES::UpdateStencilState(const GraphicsState& graphicsState)
2963 {
2964 const auto& depth = graphicsState.depthStencilState;
2965 auto& cDepth = cacheState_.depthStencilState;
2966 if (depth.enableStencilTest != cDepth.enableStencilTest) {
2967 cDepth.enableStencilTest = depth.enableStencilTest;
2968 SetState(GL_STENCIL_TEST, depth.enableStencilTest);
2969 }
2970 uint32_t setFront = 0;
2971 uint32_t setBack = 0;
2972 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_REFERENCE)) {
2973 if (cDepth.frontStencilOpState.reference != depth.frontStencilOpState.reference) {
2974 setFront |= StencilSetFlags::SETREFERENCE;
2975 }
2976 if (cDepth.backStencilOpState.reference != depth.backStencilOpState.reference) {
2977 setBack |= StencilSetFlags::SETREFERENCE;
2978 }
2979 }
2980 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
2981 if (cDepth.frontStencilOpState.compareMask != depth.frontStencilOpState.compareMask) {
2982 setFront |= StencilSetFlags::SETCOMPAREMASK;
2983 }
2984 if (cDepth.backStencilOpState.compareMask != depth.backStencilOpState.compareMask) {
2985 setBack |= StencilSetFlags::SETCOMPAREMASK;
2986 }
2987 }
2988 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
2989 if (cDepth.frontStencilOpState.writeMask != depth.frontStencilOpState.writeMask) {
2990 setFront |= StencilSetFlags::SETWRITEMASK;
2991 }
2992 if (cDepth.backStencilOpState.writeMask != depth.backStencilOpState.writeMask) {
2993 setBack |= StencilSetFlags::SETWRITEMASK;
2994 }
2995 }
2996 if (cDepth.frontStencilOpState.compareOp != depth.frontStencilOpState.compareOp) {
2997 setFront |= StencilSetFlags::SETCOMPAREOP;
2998 }
2999 if (cDepth.backStencilOpState.compareOp != depth.backStencilOpState.compareOp) {
3000 setBack |= StencilSetFlags::SETCOMPAREOP;
3001 }
3002 if (!CompareStencilOp(cDepth.frontStencilOpState, depth.frontStencilOpState)) {
3003 setFront |= StencilSetFlags::SETOP;
3004 }
3005 if (!CompareStencilOp(cDepth.backStencilOpState, depth.backStencilOpState)) {
3006 setBack |= StencilSetFlags::SETOP;
3007 }
3008 SetStencilState(setFront, depth.frontStencilOpState, setBack, depth.backStencilOpState);
3009 }
3010
UpdateDepthStencilState(const GraphicsState & graphicsState)3011 void RenderBackendGLES::UpdateDepthStencilState(const GraphicsState& graphicsState)
3012 {
3013 UpdateDepthState(graphicsState);
3014 UpdateStencilState(graphicsState);
3015 }
3016
UpdateBlendState(const GraphicsState & graphicsState)3017 void RenderBackendGLES::UpdateBlendState(const GraphicsState& graphicsState)
3018 {
3019 const auto& blend = graphicsState.colorBlendState;
3020 auto& cBlend = cacheState_.colorBlendState;
3021 for (GLuint i = 0; i < blend.colorAttachmentCount; i++) {
3022 const auto& blendState = blend.colorAttachments[i];
3023 auto& cBlendState = cBlend.colorAttachments[i];
3024 if (blendState.colorWriteMask != cBlendState.colorWriteMask) {
3025 cBlendState.colorWriteMask = blendState.colorWriteMask;
3026 glColorMaski(i, IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_R_BIT),
3027 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_G_BIT),
3028 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_B_BIT),
3029 IS_BIT_GL(cBlendState.colorWriteMask, CORE_COLOR_COMPONENT_A_BIT));
3030 }
3031
3032 // Check if blend state has changed
3033 bool factorsChanged = false;
3034 bool opsChanged = false;
3035
3036 if (blendState.enableBlend) {
3037 factorsChanged = !CompareBlendFactors(cBlendState, blendState);
3038 opsChanged = !CompareBlendOps(cBlendState, blendState);
3039 }
3040
3041 if (blendState.enableBlend == cBlendState.enableBlend && !factorsChanged && !opsChanged) {
3042 continue;
3043 }
3044 cBlendState.enableBlend = blendState.enableBlend;
3045 if (blendState.enableBlend) {
3046 glEnablei(GL_BLEND, i);
3047 if (factorsChanged) {
3048 SetBlendFactors(cBlendState, blendState);
3049 glBlendFuncSeparatei(i, GetBlendFactor(cBlendState.srcColorBlendFactor),
3050 GetBlendFactor(cBlendState.dstColorBlendFactor), GetBlendFactor(cBlendState.srcAlphaBlendFactor),
3051 GetBlendFactor(cBlendState.dstAlphaBlendFactor));
3052 }
3053 if (opsChanged) {
3054 SetBlendOps(cBlendState, blendState);
3055 glBlendEquationSeparatei(i, GetBlendOp(cBlendState.colorBlendOp), GetBlendOp(cBlendState.alphaBlendOp));
3056 }
3057 } else {
3058 glDisablei(GL_BLEND, i);
3059 }
3060 }
3061 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_BLEND_CONSTANTS)) {
3062 if (!Compare(cBlend.colorBlendConstants, blend.colorBlendConstants)) {
3063 Set(cBlend.colorBlendConstants, blend.colorBlendConstants);
3064 glBlendColor(blend.colorBlendConstants[Gles::RED_INDEX], blend.colorBlendConstants[Gles::GREEN_INDEX],
3065 blend.colorBlendConstants[Gles::BLUE_INDEX], blend.colorBlendConstants[Gles::ALPHA_INDEX]);
3066 }
3067 }
3068 // logicOps in blend not supported on GLES
3069 }
3070
UpdateRasterizationState(const GraphicsState & graphicsState)3071 void RenderBackendGLES::UpdateRasterizationState(const GraphicsState& graphicsState)
3072 {
3073 const auto& rs = graphicsState.rasterizationState;
3074 auto& crs = cacheState_.rasterizationState;
3075 // save, since we need to hack for non fill modes etc ! (possibly need shader help for lines...)
3076 polygonMode_ = rs.polygonMode;
3077 #if RENDER_HAS_GL_BACKEND
3078 if (rs.polygonMode != crs.polygonMode) {
3079 crs.polygonMode = rs.polygonMode;
3080 SetPolygonMode(rs);
3081 }
3082 #endif
3083 if (rs.enableDepthClamp != crs.enableDepthClamp) {
3084 crs.enableDepthClamp = rs.enableDepthClamp;
3085 // NOT SUPPORTED (needs an extension)
3086 }
3087 if (rs.enableRasterizerDiscard != crs.enableRasterizerDiscard) {
3088 crs.enableRasterizerDiscard = rs.enableRasterizerDiscard;
3089 SetState(GL_RASTERIZER_DISCARD, rs.enableRasterizerDiscard);
3090 }
3091 if (rs.enableDepthBias != crs.enableDepthBias) {
3092 crs.enableDepthBias = rs.enableDepthBias;
3093 SetState(GL_POLYGON_OFFSET_FILL, rs.enableDepthBias);
3094 }
3095 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_DEPTH_BIAS)) {
3096 if ((rs.depthBiasConstantFactor != crs.depthBiasConstantFactor) ||
3097 (rs.depthBiasSlopeFactor != crs.depthBiasSlopeFactor)) {
3098 crs.depthBiasConstantFactor = rs.depthBiasConstantFactor;
3099 crs.depthBiasSlopeFactor = rs.depthBiasSlopeFactor;
3100 glPolygonOffset(rs.depthBiasSlopeFactor, rs.depthBiasConstantFactor);
3101 }
3102 // depthBiasClamp NOT SUPPORTED (needs an extension)
3103 }
3104 // If cull mode Flags change...
3105 if (rs.cullModeFlags != crs.cullModeFlags) {
3106 crs.cullModeFlags = rs.cullModeFlags;
3107 SetCullMode(crs);
3108 }
3109 auto frontFace = rs.frontFace;
3110 if (!renderingToDefaultFbo_) {
3111 // Flip winding for default fbo.
3112 if (frontFace == FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE) {
3113 frontFace = FrontFace::CORE_FRONT_FACE_CLOCKWISE;
3114 } else if (frontFace == FrontFace::CORE_FRONT_FACE_CLOCKWISE) {
3115 frontFace = FrontFace::CORE_FRONT_FACE_COUNTER_CLOCKWISE;
3116 }
3117 }
3118 if (frontFace != crs.frontFace) {
3119 crs.frontFace = frontFace;
3120 SetFrontFace(crs);
3121 }
3122 if (!IS_BIT(dynamicStateFlags_, CORE_DYNAMIC_STATE_LINE_WIDTH)) {
3123 if (rs.lineWidth != crs.lineWidth) {
3124 crs.lineWidth = rs.lineWidth;
3125 glLineWidth(rs.lineWidth);
3126 }
3127 }
3128 }
3129
DoGraphicsState(const GraphicsState & graphicsState)3130 void RenderBackendGLES::DoGraphicsState(const GraphicsState& graphicsState)
3131 {
3132 /// GRAPHICSSTATE inputAssembly
3133 const auto& ia = graphicsState.inputAssembly;
3134 if (ia.enablePrimitiveRestart != graphicsState.inputAssembly.enablePrimitiveRestart) {
3135 auto& cia = cacheState_.inputAssembly;
3136 cia.enablePrimitiveRestart = ia.enablePrimitiveRestart;
3137 SetState(GL_PRIMITIVE_RESTART_FIXED_INDEX, ia.enablePrimitiveRestart);
3138 }
3139 topology_ = ia.primitiveTopology;
3140 UpdateRasterizationState(graphicsState);
3141 UpdateDepthStencilState(graphicsState);
3142 UpdateBlendState(graphicsState);
3143 }
3144
SetViewport(const RenderPassDesc::RenderArea & ra,const ViewportDesc & vd)3145 void RenderBackendGLES::SetViewport(const RenderPassDesc::RenderArea& ra, const ViewportDesc& vd)
3146 {
3147 // NOTE: viewportdesc is in floats?!?
3148 bool forceV = false;
3149 bool forceD = false;
3150 if (!viewportPrimed_) {
3151 viewportPrimed_ = true;
3152 forceV = true;
3153 forceD = true;
3154 }
3155 if ((vd.x != viewport_.x) || (vd.y != viewport_.y) || (vd.width != viewport_.width) ||
3156 (vd.height != viewport_.height)) {
3157 forceV = true;
3158 }
3159 if ((vd.minDepth != viewport_.minDepth) || (vd.maxDepth != viewport_.maxDepth)) {
3160 forceD = true;
3161 }
3162
3163 if (forceV) {
3164 viewport_.x = vd.x;
3165 viewport_.y = vd.y;
3166 viewport_.width = vd.width;
3167 viewport_.height = vd.height;
3168 viewportUpdated_ = true;
3169 }
3170 if (forceD) {
3171 viewport_.minDepth = vd.minDepth;
3172 viewport_.maxDepth = vd.maxDepth;
3173 viewportDepthRangeUpdated_ = true;
3174 }
3175 }
3176
SetScissor(const RenderPassDesc::RenderArea & ra,const ScissorDesc & sd)3177 void RenderBackendGLES::SetScissor(const RenderPassDesc::RenderArea& ra, const ScissorDesc& sd)
3178 {
3179 // NOTE: scissordesc is in floats?!?
3180 bool force = false;
3181 if (!scissorPrimed_) {
3182 scissorPrimed_ = true;
3183 force = true;
3184 }
3185 if ((sd.offsetX != scissorBox_.offsetX) || (sd.offsetY != scissorBox_.offsetY) ||
3186 (sd.extentWidth != scissorBox_.extentWidth) || (sd.extentHeight != scissorBox_.extentHeight)) {
3187 force = true;
3188 }
3189 if (force) {
3190 scissorBox_ = sd;
3191 scissorBoxUpdated_ = true;
3192 }
3193 }
3194
FlushViewportScissors()3195 void RenderBackendGLES::FlushViewportScissors()
3196 {
3197 if (!currentFrameBuffer_) {
3198 return;
3199 }
3200 bool force = false;
3201 if (scissorViewportSetDefaultFbo_ != renderingToDefaultFbo_) {
3202 force = true;
3203 scissorViewportSetDefaultFbo_ = renderingToDefaultFbo_;
3204 }
3205 if ((viewportUpdated_) || (force)) {
3206 viewportUpdated_ = false;
3207 // Handle top-left / bottom-left origin conversion
3208 PLUGIN_ASSERT(currentFrameBuffer_);
3209 auto y = static_cast<GLint>(viewport_.y);
3210 const auto h = static_cast<GLsizei>(viewport_.height);
3211 if (renderingToDefaultFbo_) {
3212 const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
3213 y = fh - (y + h);
3214 }
3215 glViewport(static_cast<GLint>(viewport_.x), y, static_cast<GLsizei>(viewport_.width), h);
3216 }
3217 if ((scissorBoxUpdated_) || (force)) {
3218 scissorBoxUpdated_ = false;
3219 // Handle top-left / bottom-left origin conversion
3220 auto y = static_cast<GLint>(scissorBox_.offsetY);
3221 const auto h = static_cast<GLsizei>(scissorBox_.extentHeight);
3222 if (renderingToDefaultFbo_) {
3223 const auto fh = static_cast<GLint>(currentFrameBuffer_->height);
3224 y = fh - (y + h);
3225 }
3226 glScissor(static_cast<GLint>(scissorBox_.offsetX), y, static_cast<GLsizei>(scissorBox_.extentWidth), h);
3227 }
3228 if (viewportDepthRangeUpdated_) {
3229 viewportDepthRangeUpdated_ = false;
3230 glDepthRangef(viewport_.minDepth, viewport_.maxDepth);
3231 }
3232 }
3233 RENDER_END_NAMESPACE()
3234