1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/gl/gl_sync.h"
17
18 #ifdef __ARM_ACLE
19 #include <arm_acle.h>
20 #endif // __ARM_ACLE
21
22 #include "tensorflow/lite/delegates/gpu/gl/gl_errors.h"
23
24 namespace tflite {
25 namespace gpu {
26 namespace gl {
27
GlSyncWait()28 absl::Status GlSyncWait() {
29 GlSync sync;
30 RETURN_IF_ERROR(GlSync::NewSync(&sync));
31 // Flush sync and loop afterwards without it.
32 GLenum status = glClientWaitSync(sync.sync(), GL_SYNC_FLUSH_COMMANDS_BIT,
33 /* timeout ns = */ 0);
34 while (true) {
35 switch (status) {
36 case GL_TIMEOUT_EXPIRED:
37 break;
38 case GL_CONDITION_SATISFIED:
39 case GL_ALREADY_SIGNALED:
40 return absl::OkStatus();
41 case GL_WAIT_FAILED:
42 return GetOpenGlErrors();
43 }
44 status = glClientWaitSync(sync.sync(), 0, /* timeout ns = */ 10000000);
45 }
46 return absl::OkStatus();
47 }
48
GlActiveSyncWait()49 absl::Status GlActiveSyncWait() {
50 GlSync sync;
51 RETURN_IF_ERROR(GlSync::NewSync(&sync));
52 // Since creating a Sync object is itself a GL command it *must* be flushed.
53 // Otherwise glGetSynciv may never succeed. Perform a flush with
54 // glClientWaitSync call.
55 GLenum status = glClientWaitSync(sync.sync(), GL_SYNC_FLUSH_COMMANDS_BIT,
56 /* timeout ns = */ 0);
57 switch (status) {
58 case GL_TIMEOUT_EXPIRED:
59 break;
60 case GL_CONDITION_SATISFIED:
61 case GL_ALREADY_SIGNALED:
62 return absl::OkStatus();
63 case GL_WAIT_FAILED:
64 return GetOpenGlErrors();
65 }
66
67 // Start active loop.
68 GLint result = GL_UNSIGNALED;
69 while (true) {
70 glGetSynciv(sync.sync(), GL_SYNC_STATUS, sizeof(GLint), nullptr, &result);
71 if (result == GL_SIGNALED) {
72 return absl::OkStatus();
73 }
74 #ifdef __ARM_ACLE
75 // Try to save CPU power by yielding CPU to another thread.
76 __yield();
77 #endif
78 }
79 }
80
NewSync(GlShaderSync * gl_sync)81 absl::Status GlShaderSync::NewSync(GlShaderSync* gl_sync) {
82 GlShaderSync sync;
83 RETURN_IF_ERROR(CreatePersistentBuffer(sizeof(int), &sync.flag_buffer_));
84 static const std::string* kCode = new std::string(R"(#version 310 es
85 layout(local_size_x = 1, local_size_y = 1) in;
86 layout(std430) buffer;
87 layout(binding = 0) buffer Output {
88 int elements[];
89 } output_data;
90 void main() {
91 output_data.elements[0] = 1;
92 })");
93 GlShader shader;
94 RETURN_IF_ERROR(GlShader::CompileShader(GL_COMPUTE_SHADER, *kCode, &shader));
95 RETURN_IF_ERROR(GlProgram::CreateWithShader(shader, &sync.flag_program_));
96 *gl_sync = std::move(sync);
97 return absl::OkStatus();
98 }
99
100 // How it works: GPU writes a buffer and CPU checks the buffer value to be
101 // changed. The buffer is accessible for writing by GPU and reading by CPU
102 // simultaneously - persistent buffer or buffer across shild context can be used
103 // for that.
Wait()104 absl::Status GlShaderSync::Wait() {
105 if (!flag_buffer_.is_valid()) {
106 return absl::UnavailableError("GlShaderSync is not initialized.");
107 }
108 RETURN_IF_ERROR(flag_buffer_.BindToIndex(0));
109 volatile int* flag_ptr_ = reinterpret_cast<int*>(flag_buffer_.data());
110 *flag_ptr_ = 0;
111 RETURN_IF_ERROR(flag_program_.Dispatch({1, 1, 1}));
112 // glFlush must be called to upload GPU task. Adreno won't start executing
113 // the task without glFlush.
114 glFlush();
115 // Wait for the value is being updated by the shader.
116 while (*flag_ptr_ != 1) {
117 }
118 return absl::OkStatus();
119 }
120
121 } // namespace gl
122 } // namespace gpu
123 } // namespace tflite
124