1 /* 2 * Copyright (c) 2022 Arm Limited. 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in all 14 * copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 #ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP 25 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP 26 27 #include "components/Types.h" 28 29 #include <cstdint> 30 #include <cstdlib> 31 #include <vector> 32 #include <set> 33 #include <map> 34 35 namespace arm_compute 36 { 37 /** Forward declaration */ 38 class ITensorInfo; 39 namespace experimental 40 { 41 namespace dynamic_fusion 42 { 43 class IGpuKernelComponent; 44 /** A group of gpu kernel components to be fused together 45 * PRECONDITIONS: 46 * 1. Fusion is limited to a linear sequence of kernel components 47 * INVARIANTS: 48 * @note These preconditions and invariants are exactly the same as fusion constraints for kernel components 49 * 2. Max number of components that can be fused is @ref GpuKernelComponentGroup::max_fused_components ( 50 * excluding any output or input (if any) components. 51 * The max number of output components are bound by the maximum number of dst tensors allowed for a component / component group 52 * ) 53 * 3. The fusion is subject to the pattern: (Complex + Simple * | Simple + Simple * | Un-fusable) + Output? 54 * 4. All components but unfusable, have exactly 1 dst tensor 55 * 5. All fused components share the same @ref IGpuKernelComponent::Properties ( @ref UnitWorkloadStage etc. ) 56 * 6. All fused components share the same tunable parameters like tile size 57 * 7. All fused components share the same dst tensor shape 58 * 8. All fused components' tensors share the same @ref DataLayout 59 * 9. Maximum number of dst tensors allowed for an component (including unfusable) / component group is @ref GpuKernelComponentGroup::max_dst_tensors 60 * This has an impact on the total number of components supported, which = max_fused_components + max_dst_tensors 61 */ 62 class GpuKernelComponentGroup 63 { 64 public: 65 using ComponentPtr = IGpuKernelComponent *; 66 /** Maximum number of components that can be fused into the same component group 67 */ 68 static constexpr size_t max_fused_components = 64; 69 /** Maximum number of dst tensors allowed for a component / component 70 */ 71 static constexpr size_t max_dst_tensors = 8; 72 73 public: 74 /** Default constructor */ 75 GpuKernelComponentGroup() = default; 76 /** Allow instances of this class to be copy constructed */ 77 GpuKernelComponentGroup(const GpuKernelComponentGroup &) = default; 78 /** Allow instances of this class to be copied */ 79 GpuKernelComponentGroup &operator=(const GpuKernelComponentGroup &) = default; 80 /** Allow instances of this class to be move constructed */ 81 GpuKernelComponentGroup(GpuKernelComponentGroup &&) = default; 82 /** Allow instances of this class to be moved */ 83 GpuKernelComponentGroup &operator=(GpuKernelComponentGroup &&) = default; 84 /** Add a component pointer into the group 85 * If the operation fails, then no change is made to the group 86 * 87 * @param[in] component Pointer to the component to be added 88 * 89 * @return true If the operation is successful 90 * @return false If the operation fails 91 */ 92 bool add_component(ComponentPtr component); 93 /** Optimize and pre-compute information about the component group */ 94 void finalize(); 95 /** Get one of the destination tensors of this group */ 96 const ITensorInfo *get_any_dst_tensor() const; 97 /** Get tensor argument of this group 98 * A tensor is an argument if it is a source or destination tensor to the group 99 */ 100 std::vector<const ITensorInfo *> get_argument_tensors() const; 101 /** Get the root (first) component of this group */ 102 ComponentPtr get_root_component() const; 103 /** Check if a @ref ITensorInfo is an "intermediate" tensor of the group 104 * 105 * An intermediate tensor is any tensor that is not an argument. 106 * 107 * @param[in] tensor @ref ITensorInfo to be looked up 108 * 109 * @return true If @p tensor is an intermediate tensor 110 * @return false Otherwise 111 */ 112 bool is_intermediate_tensor(const ITensorInfo *tensor) const; 113 /** Check if an @ref ITensorInfo is an input tensor of the group. 114 * 115 * @param[in] tensor @ref ITensorInfo to be looked up. 116 * 117 * @return true if @p tensor is an input tensor of the group, otherwise false. 118 */ 119 bool is_input_tensor(const ITensorInfo *tensor) const; 120 /** Get the list of temporary tiles that need to be declared */ 121 std::vector<const ITensorInfo *> get_tiles() const; 122 /** Get the shared tile that can be used to store temporary data of the specified tensor. 123 * 124 * @param[in] tensor @ref ITensorInfo to be looked up. 125 * 126 * @return @ref ITensorInfo that is used to store temporary data of @p tensor. 127 **/ 128 const ITensorInfo *get_tile_for_tensor(const ITensorInfo *tensor) const; 129 /** Get the number of components within the group */ 130 size_t size() const; 131 /** Check if the component group is empty */ 132 bool empty() const; 133 ComponentPtr &operator[](size_t index); 134 const ComponentPtr &operator[](size_t index) const; 135 typename std::vector<ComponentPtr>::iterator begin(); 136 typename std::vector<ComponentPtr>::iterator end(); 137 typename std::vector<ComponentPtr>::const_iterator begin() const; 138 typename std::vector<ComponentPtr>::const_iterator end() const; 139 typename std::vector<ComponentPtr>::const_iterator cbegin() const; 140 typename std::vector<ComponentPtr>::const_iterator cend() const; 141 142 private: 143 std::vector<ComponentPtr> _components{}; 144 145 bool _finalized{ false }; 146 147 std::vector<const ITensorInfo *> _argument_tensors{}; 148 std::set<const ITensorInfo *> _input_tensors{}; 149 std::set<const ITensorInfo *> _interm_tensors{}; 150 const ITensorInfo *_any_output_tensor{ nullptr }; 151 std::vector<const ITensorInfo *> _tiles{}; 152 std::map<const ITensorInfo *, const ITensorInfo *> _tile_map{}; 153 }; 154 } // namespace dynamic_fusion 155 } // namespace experimental 156 } // namespace arm_compute 157 #endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP */ 158