1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_NODE_SHADER_H_ 17 #define TENSORFLOW_LITE_DELEGATES_GPU_GL_NODE_SHADER_H_ 18 19 #include <array> 20 #include <cstdint> 21 #include <memory> 22 #include <string> 23 #include <vector> 24 25 #include "absl/types/any.h" 26 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h" 27 #include "tensorflow/lite/delegates/gpu/common/model.h" 28 #include "tensorflow/lite/delegates/gpu/common/status.h" 29 #include "tensorflow/lite/delegates/gpu/common/types.h" 30 #include "tensorflow/lite/delegates/gpu/gl/compiler_options.h" 31 #include "tensorflow/lite/delegates/gpu/gl/object.h" 32 #include "tensorflow/lite/delegates/gpu/gl/variable.h" 33 34 namespace tflite { 35 namespace gpu { 36 namespace gl { 37 38 enum class IOStructure { 39 // Source code uses standard inputs or outputs that should be generated from 40 // node inputs/outputs. Compiler will generate them automatically as 41 // 'input_data_N'/'output_data_N', where N is an index of the input/output. 42 // 43 // Generated code should not return input objects. 44 ONLY_DEFINITIONS, 45 46 // For inputs: 47 // Source code runs computations using 'vec4 value_N' declared by the 48 // compiler, where N is an index of the input. Each value comes from inputs 49 // using coordinates set by GlobalInvocationID and a dispatch method, 50 // therefore, source code should not explicitly read values. 51 // 52 // For outputs: 53 // Source code runs computations and leaves results in 'vec4 value_N' 54 // declared by the compiler, where N is an index of the output. Value will 55 // be written to the output using coordinates set by GlobalInvocationID and 56 // a dispatch method. Therefore, source code should not explicitly write 57 // results. 58 AUTO, 59 }; 60 61 struct GeneratedCode { 62 // A list of parameters to be set as uniform or hardcoded in a shader. 63 std::vector<Variable> parameters; 64 65 // A list of objects to bind before shader could be executed. 66 std::vector<std::pair<std::string, Object>> objects; 67 68 // A list of shared variables in the shader program. 69 std::vector<Variable> shared_variables; 70 71 // Compute shader operate on an abstract concept of work groups, each 72 // three-dimensional. The number of work groups to be executed is defined by 73 // workload tuple. Therefore, 74 // workload[x,y,z] := workgroup_size[x,y,z] X workgroup_count[x,y,z] 75 // where 'X' is element-wise multiplication. 76 // 77 // Zero workload is calculated as PHWC4 based on output tensor. 78 uint3 workload; 79 80 // operation may specify recommended workgroup size. If not set, runtime will 81 // figure it out automatically. 82 uint3 workgroup; 83 84 std::string source_code; 85 86 // Parameters below reveal additional information about source_code. 87 88 IOStructure input; 89 IOStructure output; 90 }; 91 92 // A class handles shader generation and setting runtime shader parameters. 93 class NodeShader { 94 public: 95 virtual ~NodeShader() = default; 96 97 // A context for generating a code. 98 struct GenerationContext { 99 const GpuInfo* gpu_info; 100 CompilationOptions compiler_options; 101 102 // Information extracted & copied from compiled graph. 103 const std::string& op_type; 104 const absl::any& op_attr; 105 // Do NOT use StrongShape<Layout::BHWC> in preparation for 106 // RankedTensorType::getShape() which returns ArrayRef<int64_t>. 107 std::vector<std::array<int64_t, 4>> input_shapes; 108 std::vector<std::array<int64_t, 4>> output_shapes; 109 }; 110 111 // Generates shader code for a node. The code should be just a function body. 112 virtual absl::Status GenerateCode(const GenerationContext& ctx, 113 GeneratedCode* generated_code) const = 0; 114 115 // Limit the size of the const offsets array 116 static constexpr int kMaxConstArraySize = 9; 117 }; 118 119 } // namespace gl 120 } // namespace gpu 121 } // namespace tflite 122 123 #endif // TENSORFLOW_LITE_DELEGATES_GPU_GL_NODE_SHADER_H_ 124