• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright 2021 Huawei Technologies Co., Ltd
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_OUTPUT_ACTOR_H_
18 #define MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_OUTPUT_ACTOR_H_
19 
20 #include <vector>
21 #include <string>
22 #include <memory>
23 #include <utility>
24 #include <algorithm>
25 #include <map>
26 #include "utils/hash_map.h"
27 #include "runtime/graph_scheduler/control_node_parser.h"
28 #include "runtime/graph_scheduler/device_tensor_store.h"
29 #include "runtime/graph_scheduler/actor/actor_common.h"
30 #include "runtime/graph_scheduler/actor/abstract_actor.h"
31 #include "runtime/hardware/device_context.h"
32 #include "include/backend/anf_runtime_algorithm.h"
33 #include "include/common/utils/anfalgo.h"
34 #include "ir/tensor.h"
35 
36 namespace mindspore {
37 namespace runtime {
38 using mindspore::device::DeviceContext;
39 using mindspore::session::KernelWithIndex;
40 using mindspore::tensor::TensorPtr;
41 
42 // The output actor is used to receive the output result of actor which represents the graph output.
43 class OutputActor : public AbstractActor {
44  public:
OutputActor(const std::string & name,size_t loop_count,size_t outputs_num,const std::vector<KernelWithIndex> & summary_nodes)45   OutputActor(const std::string &name, size_t loop_count, size_t outputs_num,
46               const std::vector<KernelWithIndex> &summary_nodes)
47       : AbstractActor(name, KernelTransformType::kOutputActor, nullptr),
48         loop_count_(loop_count),
49         current_count_(0),
50         summary_nodes_(summary_nodes),
51         outputs_num_(outputs_num),
52         current_outputs_num_(0) {
53     outputs_.resize(outputs_num);
54     output_nodes_.resize(outputs_num);
55     output_device_tensors_.resize(outputs_num);
56     device_contexts_.resize(outputs_num);
57   }
58   ~OutputActor() override = default;
59 
60   // The output actor collects loop count when receive the input control of loop count actor.
61   void RunOpControl(AID *const input_control, OpContext<DeviceTensor> *const context) override;
62 
63   // The output actor collects output result when receive the data of actor.
64   void RunOpData(OpData<DeviceTensor> *const input_data, OpContext<DeviceTensor> *const context) override;
65 
66   // The graph output need be set new device address every step or loop, to avoid that the device address
67   // context of tensor be rewritten in the next step or next loop.
68   void UpdateOutputDeviceAddress();
69 
70   // Summary node will keep the inputs, so if the input(except parameter, weight) size changes in dynamic shape,
71   // the input device address will be reuse in a wrong way. So we should free summary node inputs after usage.
72   void FreeSummaryNodeMem();
73 
74   // Get the member.
loop_count()75   size_t loop_count() const { return loop_count_; }
outputs_num()76   size_t outputs_num() const { return outputs_num_; }
outputs()77   const std::vector<TensorPtr> &outputs() const { return outputs_; }
78 
79  protected:
80   void Init() override;
81 
82  private:
83   friend class GraphScheduler;
84   friend class ControlNodeScheduler;
85 
86   TensorPtr CreateOutputTensor(const AnfNodePtr &output_node, size_t output_index, size_t output_position,
87                                OpContext<DeviceTensor> *const context);
88 
89   // The output device memory will be taken over by tensor in the last loop, otherwise needs to free the memory.
90   // 1.Avoid the memory leak when memory used by dynamic ref count in the control flow scene.
91   // 2.Alloc the new memory in the next step using the new shape size in the dynamic shape scene.
92   void FreeOutputNodeMem();
93 
94   // Clear output nodes and tensors in cache.
95   void ClearOutputCache();
96 
97   // The loop count is constant, the current count is increased after each step running finished.
98   // Collect the output result in the last loop which is represented by "loop_count_ - current_count_ == 1".
99   size_t loop_count_;
100   size_t current_count_;
101 
102   // The outputs.
103   std::vector<KernelWithIndex> summary_nodes_;
104   std::vector<TensorPtr> outputs_;
105   std::vector<KernelWithIndex> output_nodes_;
106   std::vector<DeviceTensor *> output_device_tensors_;
107   size_t outputs_num_;
108   size_t current_outputs_num_;
109 
110   std::map<KernelWithIndex, DeviceTensorPtr> output_node_to_tensor_device_address_;
111 };
112 
113 using OutputActorPtr = std::shared_ptr<OutputActor>;
114 }  // namespace runtime
115 }  // namespace mindspore
116 
117 #endif  // MINDSPORE_CCSRC_RUNTIME_FRAMEWORK_ACTOR_OUTPUT_ACTOR_H_
118