• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2022 Arm Limited.
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to
8  * deal in the Software without restriction, including without limitation the
9  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10  * sell copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all
14  * copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 #ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP
25 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP
26 
27 #include "components/Types.h"
28 
29 #include <cstdint>
30 #include <cstdlib>
31 #include <vector>
32 #include <set>
33 #include <map>
34 
35 namespace arm_compute
36 {
37 /** Forward declaration */
38 class ITensorInfo;
39 namespace experimental
40 {
41 namespace dynamic_fusion
42 {
43 class IGpuKernelComponent;
44 /** A group of gpu kernel components to be fused together
45  * PRECONDITIONS:
46  * 1. Fusion is limited to a linear sequence of kernel components
47  * INVARIANTS:
48  * @note These preconditions and invariants are exactly the same as fusion constraints for kernel components
49  * 2. Max number of components that can be fused is @ref GpuKernelComponentGroup::max_fused_components (
50  *        excluding any output or input (if any) components.
51  *        The max number of output components are bound by the maximum number of dst tensors allowed for a component / component group
52  *    )
53  * 3. The fusion is subject to the pattern: (Complex + Simple * | Simple + Simple * | Un-fusable) + Output?
54  * 4. All components but unfusable, have exactly 1 dst tensor
55  * 5. All fused components share the same @ref IGpuKernelComponent::Properties ( @ref UnitWorkloadStage etc. )
56  * 6. All fused components share the same tunable parameters like tile size
57  * 7. All fused components share the same dst tensor shape
58  * 8. All fused components' tensors share the same @ref DataLayout
59  * 9. Maximum number of dst tensors allowed for an component (including unfusable) / component group is @ref GpuKernelComponentGroup::max_dst_tensors
60  *      This has an impact on the total number of components supported, which = max_fused_components + max_dst_tensors
61  */
62 class GpuKernelComponentGroup
63 {
64 public:
65     using ComponentPtr = IGpuKernelComponent *;
66     /** Maximum number of components that can be fused into the same component group
67      */
68     static constexpr size_t max_fused_components = 64;
69     /** Maximum number of dst tensors allowed for a component / component
70      */
71     static constexpr size_t max_dst_tensors = 8;
72 
73 public:
74     /** Default constructor */
75     GpuKernelComponentGroup() = default;
76     /** Allow instances of this class to be copy constructed */
77     GpuKernelComponentGroup(const GpuKernelComponentGroup &) = default;
78     /** Allow instances of this class to be copied */
79     GpuKernelComponentGroup &operator=(const GpuKernelComponentGroup &) = default;
80     /** Allow instances of this class to be move constructed */
81     GpuKernelComponentGroup(GpuKernelComponentGroup &&) = default;
82     /** Allow instances of this class to be moved */
83     GpuKernelComponentGroup &operator=(GpuKernelComponentGroup &&) = default;
84     /** Add a component pointer into the group
85      * If the operation fails, then no change is made to the group
86      *
87      * @param[in] component Pointer to the component to be added
88      *
89      * @return true      If the operation is successful
90      * @return false     If the operation fails
91      */
92     bool add_component(ComponentPtr component);
93     /** Optimize and pre-compute information about the component group */
94     void finalize();
95     /** Get one of the destination tensors of this group */
96     const ITensorInfo *get_any_dst_tensor() const;
97     /** Get tensor argument of this group
98      *  A tensor is an argument if it is a source or destination tensor to the group
99      */
100     std::vector<const ITensorInfo *> get_argument_tensors() const;
101     /** Get the root (first) component of this group */
102     ComponentPtr get_root_component() const;
103     /** Check if a @ref ITensorInfo is an "intermediate" tensor of the group
104      *
105      * An intermediate tensor is any tensor that is not an argument.
106      *
107      * @param[in] tensor @ref ITensorInfo to be looked up
108      *
109      * @return true  If @p tensor is an intermediate tensor
110      * @return false  Otherwise
111      */
112     bool is_intermediate_tensor(const ITensorInfo *tensor) const;
113     /** Check if an @ref ITensorInfo is an input tensor of the group.
114      *
115      * @param[in] tensor @ref ITensorInfo to be looked up.
116      *
117      * @return true if @p tensor is an input tensor of the group, otherwise false.
118      */
119     bool is_input_tensor(const ITensorInfo *tensor) const;
120     /** Get the list of temporary tiles that need to be declared */
121     std::vector<const ITensorInfo *> get_tiles() const;
122     /** Get the shared tile that can be used to store temporary data of the specified tensor.
123      *
124      * @param[in] tensor @ref ITensorInfo to be looked up.
125      *
126      * @return @ref ITensorInfo that is used to store temporary data of @p tensor.
127      **/
128     const ITensorInfo *get_tile_for_tensor(const ITensorInfo *tensor) const;
129     /** Get the number of components within the group */
130     size_t size() const;
131     /** Check if the component group is empty */
132     bool empty() const;
133     ComponentPtr &operator[](size_t index);
134     const ComponentPtr &operator[](size_t index) const;
135     typename std::vector<ComponentPtr>::iterator       begin();
136     typename std::vector<ComponentPtr>::iterator       end();
137     typename std::vector<ComponentPtr>::const_iterator begin() const;
138     typename std::vector<ComponentPtr>::const_iterator end() const;
139     typename std::vector<ComponentPtr>::const_iterator cbegin() const;
140     typename std::vector<ComponentPtr>::const_iterator cend() const;
141 
142 private:
143     std::vector<ComponentPtr> _components{};
144 
145     bool _finalized{ false };
146 
147     std::vector<const ITensorInfo *> _argument_tensors{};
148     std::set<const ITensorInfo *> _input_tensors{};
149     std::set<const ITensorInfo *> _interm_tensors{};
150     const ITensorInfo *_any_output_tensor{ nullptr };
151     std::vector<const ITensorInfo *> _tiles{};
152     std::map<const ITensorInfo *, const ITensorInfo *> _tile_map{};
153 };
154 } // namespace dynamic_fusion
155 } // namespace experimental
156 } // namespace arm_compute
157 #endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP */
158