1 /** 2 * Copyright 2023 Huawei Technologies Co., Ltd 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_OP_RUNNER_H_ 18 #define MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_OP_RUNNER_H_ 19 20 #include <functional> 21 #include <memory> 22 #include <string> 23 #include <utility> 24 #include <vector> 25 #include "ir/scalar.h" 26 #include "utils/log_adapter.h" 27 #include "utils/ms_utils.h" 28 #include "ir/tensor.h" 29 #include "include/backend/visible.h" 30 #include "abstract/ops/primitive_infer_map.h" 31 #include "kernel/pyboost/pyboost_utils.h" 32 #include "ops/ops_func_impl/simple_infer.h" 33 #include "include/backend/mem_reuse/mem_tracker.h" 34 35 namespace mindspore { 36 namespace tensor { 37 using BaseTensorPtr = tensor::BaseTensorPtr; 38 } 39 namespace kernel { 40 namespace pyboost { 41 using BaseTensorPtr = tensor::BaseTensorPtr; 42 // OpRunner is a base class for operators. 43 // OpRunner records the operator's input abstract, 44 // output abstract and output Tensors for grad, 45 // and it also contains several functional methods for the operator to run. 46 class BACKEND_EXPORT OpRunner : public std::enable_shared_from_this<OpRunner> { 47 public: OpRunner(PrimitivePtr primitive,const DeviceContext * device_context)48 OpRunner(PrimitivePtr primitive, const DeviceContext *device_context) 49 : primitive_(std::move(primitive)), device_context_(device_context) {} 50 virtual ~OpRunner() = default; 51 52 // For users to implement custom call functions in the "customize" directory. get_op()53 std::shared_ptr<OpRunner> get_op() { return shared_from_this(); } 54 55 // set and get methods for class member variables. set_primitive(const PrimitivePtr & primitive)56 void set_primitive(const PrimitivePtr &primitive) { primitive_ = primitive; } primitive()57 const PrimitivePtr &primitive() const { return primitive_; } input_abs()58 const std::vector<AbstractBasePtr> &input_abs() const { return input_abs_; } set_input_abs(const std::vector<AbstractBasePtr> & input_abs)59 void set_input_abs(const std::vector<AbstractBasePtr> &input_abs) { input_abs_ = input_abs; } output_abs()60 const AbstractBasePtr &output_abs() const { return output_abs_; } set_output_abs(const AbstractBasePtr & output_abs)61 void set_output_abs(const AbstractBasePtr &output_abs) { output_abs_ = output_abs; } device_context()62 const DeviceContext *device_context() const { return device_context_; } device_sync_promises()63 const std::vector<pynative::DeviceAddressPromisePtr> &device_sync_promises() const { return device_sync_promises_; } outputs()64 const std::vector<tensor::BaseTensorPtr> &outputs() const { return outputs_; } set_outputs(const std::vector<tensor::BaseTensorPtr> & outputs)65 void set_outputs(const std::vector<tensor::BaseTensorPtr> &outputs) { outputs_ = outputs; } set_stream_id(size_t stream_id)66 void set_stream_id(size_t stream_id) { stream_id_ = stream_id; } stream_id()67 size_t stream_id() const { return stream_id_; } output_value_simple_info()68 ValueSimpleInfoPtr output_value_simple_info() const { return output_value_simple_info_; } 69 output(const size_t & idx)70 const tensor::BaseTensorPtr &output(const size_t &idx) { 71 if (idx >= outputs_.size()) { 72 MS_LOG(EXCEPTION) << "idx is out of bounds, idx:" << idx << ", outputs_.size():" << outputs_.size(); 73 } 74 return outputs_[idx]; 75 } 76 77 // For view op used SetOutputAbstract()78 void SetOutputAbstract() { output_abs_ = kAbstractConverter.ConvertAbstract(output(kIndex0)); } 79 80 // For view op used SetOutputTupleAbstract()81 void SetOutputTupleAbstract() { 82 AbstractBasePtrList abs_list; 83 for (const auto &output : outputs_) { 84 const auto &abs = kAbstractConverter.ConvertAbstract(output); 85 (void)abs_list.emplace_back(abs); 86 } 87 output_abs_ = std::make_shared<abstract::AbstractTuple>(abs_list); 88 } 89 90 template <typename... T> GenerateInputAbstract(T &...args)91 void GenerateInputAbstract(T &... args) { 92 input_abs_.clear(); 93 (input_abs_.emplace_back(kAbstractConverter.ConvertAbstract(args)), ...); 94 } 95 96 // Member function for Infer and creating output tensors. 97 template <typename... T> InferOutput(T &...args)98 void InferOutput(T &... args) { 99 runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyBoostInferOutput, 100 primitive_->name(), false); 101 if (output_value_simple_info_ = ops::InferBySimple(primitive_, args...); output_value_simple_info_ != nullptr) { 102 MS_LOG(DEBUG) << "Op " << primitive_->name() << " infer by simple, get output " 103 << ValueSimpleInfoToString(*output_value_simple_info_); 104 PyBoostUtils::CreateOutputTensor(output_value_simple_info_, &outputs_); 105 return; 106 } 107 108 GenerateInputAbstract(args...); 109 output_abs_ = PyBoostUtils::InferByOpDef(primitive_, input_abs_); 110 MS_EXCEPTION_IF_NULL(output_abs_); 111 MS_LOG(DEBUG) << "PyBoost infer by abstract, get output " << output_abs_->ToString(); 112 PyBoostUtils::CreateOutputTensor(output_abs_, &outputs_); 113 kAbstractConverter.CacheAbstract(output_abs_); 114 } 115 116 // A static function used for the "customize" operator to generate the operator's output Tensor. 117 template <typename... T> InferOpOutput(const std::shared_ptr<OpRunner> & op,T &...args)118 static void InferOpOutput(const std::shared_ptr<OpRunner> &op, T &... args) { 119 op->InferOutput(args...); 120 } 121 ProfileMemoryInfo()122 void ProfileMemoryInfo() { 123 static bool enable_trace_mem = common::IsEnableAlllocConfig(common::kAllocMemoryTracker); 124 if (!(MsContext::GetInstance()->get_param<bool>(MS_CTX_ENABLE_PROF_MEM) || enable_trace_mem)) { 125 return; 126 } 127 128 PyBoostUtils::DispatchRun(std::make_shared<runtime::PyBoostDeviceTask>([primitive = primitive_]() { 129 device::tracker::CALL_MEMORY_TRACKER_WITH_FILE(AddTask, "PyNative", primitive->name(), ""); 130 })); 131 } 132 UpdateOutputShape(const BaseTensorPtr & tensor,const ShapeVector & shape)133 void UpdateOutputShape(const BaseTensorPtr &tensor, const ShapeVector &shape) { 134 tensor->set_shape(shape); 135 std::static_pointer_cast<device::DeviceAddress>(tensor->device_address())->address_common()->shape_vector_ = shape; 136 } 137 138 protected: 139 // Op primitive, may delete latter. 140 PrimitivePtr primitive_{nullptr}; 141 // Input and output abstracts for grad. 142 std::vector<AbstractBasePtr> input_abs_{}; 143 AbstractBasePtr output_abs_{nullptr}; 144 // Forward output for grad. 145 std::vector<tensor::BaseTensorPtr> outputs_{}; 146 const DeviceContext *device_context_{nullptr}; 147 // Device address promise for multi-stage pipeline. 148 std::vector<pynative::DeviceAddressPromisePtr> device_sync_promises_; 149 // Op stream id 150 size_t stream_id_{kDefaultStreamIndex}; 151 ValueSimpleInfoPtr output_value_simple_info_; 152 inline static pynative::AbstractConverter kAbstractConverter; 153 }; 154 using OpPtr = std::shared_ptr<OpRunner>; 155 } // namespace pyboost 156 } // namespace kernel 157 } // namespace mindspore 158 #endif // MINDSPORE_MINDSPORE_CCSRC_KERNEL_PYBOOST_OP_RUNNER_H_ 159